sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 DATE_ADD_OR_SUB, 8 Dialect, 9 NormalizationStrategy, 10 approx_count_distinct_sql, 11 arg_max_or_min_no_count, 12 datestrtodate_sql, 13 build_formatted_time, 14 if_sql, 15 is_parse_json, 16 left_to_substring_sql, 17 locate_to_strposition, 18 max_or_greatest, 19 min_or_least, 20 no_ilike_sql, 21 no_recursive_cte_sql, 22 no_safe_divide_sql, 23 no_trycast_sql, 24 regexp_extract_sql, 25 regexp_replace_sql, 26 rename_func, 27 right_to_substring_sql, 28 strposition_to_locate_sql, 29 struct_extract_sql, 30 time_format, 31 timestrtotime_sql, 32 unit_to_str, 33 var_map_sql, 34) 35from sqlglot.transforms import ( 36 remove_unique_constraints, 37 ctas_with_tmp_tables_to_create_tmp_view, 38 preprocess, 39 move_schema_columns_to_partitioned_by, 40) 41from sqlglot.helper import seq_get 42from sqlglot.tokens import TokenType 43 44# (FuncType, Multiplier) 45DATE_DELTA_INTERVAL = { 46 "YEAR": ("ADD_MONTHS", 12), 47 "MONTH": ("ADD_MONTHS", 1), 48 "QUARTER": ("ADD_MONTHS", 3), 49 "WEEK": ("DATE_ADD", 7), 50 "DAY": ("DATE_ADD", 1), 51} 52 53TIME_DIFF_FACTOR = { 54 "MILLISECOND": " * 1000", 55 "SECOND": "", 56 "MINUTE": " / 60", 57 "HOUR": " / 3600", 58} 59 60DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 61 62 63def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str: 64 if isinstance(expression, exp.TsOrDsAdd) and not expression.unit: 65 return self.func("DATE_ADD", expression.this, expression.expression) 66 67 unit = expression.text("unit").upper() 68 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 69 70 if isinstance(expression, exp.DateSub): 71 multiplier *= -1 72 73 if expression.expression.is_number: 74 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 75 else: 76 modified_increment = expression.expression 77 if multiplier != 1: 78 modified_increment = exp.Mul( # type: ignore 79 this=modified_increment, expression=exp.Literal.number(multiplier) 80 ) 81 82 return self.func(func, expression.this, modified_increment) 83 84 85def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str: 86 unit = expression.text("unit").upper() 87 88 factor = TIME_DIFF_FACTOR.get(unit) 89 if factor is not None: 90 left = self.sql(expression, "this") 91 right = self.sql(expression, "expression") 92 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 93 return f"({sec_diff}){factor}" if factor else sec_diff 94 95 months_between = unit in DIFF_MONTH_SWITCH 96 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 97 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 98 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 99 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 100 101 if months_between or multiplier_sql: 102 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part. 103 # For the same reason, we want to truncate if there's a divisor present. 104 diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)" 105 106 return diff_sql 107 108 109def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 110 this = expression.this 111 112 if is_parse_json(this): 113 if this.this.is_string: 114 # Since FROM_JSON requires a nested type, we always wrap the json string with 115 # an array to ensure that "naked" strings like "'a'" will be handled correctly 116 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 117 118 from_json = self.func( 119 "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json) 120 ) 121 to_json = self.func("TO_JSON", from_json) 122 123 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 124 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 125 return self.sql(this) 126 127 return self.func("TO_JSON", this, expression.args.get("options")) 128 129 130def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 131 if expression.expression: 132 self.unsupported("Hive SORT_ARRAY does not support a comparator") 133 return self.func("SORT_ARRAY", expression.this) 134 135 136def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 137 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 138 139 140def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 141 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 142 143 144def _unix_to_time_sql(self: Hive.Generator, expression: exp.UnixToTime) -> str: 145 timestamp = self.sql(expression, "this") 146 scale = expression.args.get("scale") 147 if scale in (None, exp.UnixToTime.SECONDS): 148 return rename_func("FROM_UNIXTIME")(self, expression) 149 150 return f"FROM_UNIXTIME({timestamp} / POW(10, {scale}))" 151 152 153def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 154 this = self.sql(expression, "this") 155 time_format = self.format_time(expression) 156 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 157 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 158 return f"CAST({this} AS DATE)" 159 160 161def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 162 this = self.sql(expression, "this") 163 time_format = self.format_time(expression) 164 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 165 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 166 return f"CAST({this} AS TIMESTAMP)" 167 168 169def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 170 time_format = self.format_time(expression) 171 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 172 return self.func("TO_DATE", expression.this, time_format) 173 174 if isinstance(expression.this, exp.TsOrDsToDate): 175 return self.sql(expression, "this") 176 177 return self.func("TO_DATE", expression.this) 178 179 180def _build_with_ignore_nulls( 181 exp_class: t.Type[exp.Expression], 182) -> t.Callable[[t.List[exp.Expression]], exp.Expression]: 183 def _parse(args: t.List[exp.Expression]) -> exp.Expression: 184 this = exp_class(this=seq_get(args, 0)) 185 if seq_get(args, 1) == exp.true(): 186 return exp.IgnoreNulls(this=this) 187 return this 188 189 return _parse 190 191 192class Hive(Dialect): 193 ALIAS_POST_TABLESAMPLE = True 194 IDENTIFIERS_CAN_START_WITH_DIGIT = True 195 SUPPORTS_USER_DEFINED_TYPES = False 196 SAFE_DIVISION = True 197 198 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 199 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 200 201 TIME_MAPPING = { 202 "y": "%Y", 203 "Y": "%Y", 204 "YYYY": "%Y", 205 "yyyy": "%Y", 206 "YY": "%y", 207 "yy": "%y", 208 "MMMM": "%B", 209 "MMM": "%b", 210 "MM": "%m", 211 "M": "%-m", 212 "dd": "%d", 213 "d": "%-d", 214 "HH": "%H", 215 "H": "%-H", 216 "hh": "%I", 217 "h": "%-I", 218 "mm": "%M", 219 "m": "%-M", 220 "ss": "%S", 221 "s": "%-S", 222 "SSSSSS": "%f", 223 "a": "%p", 224 "DD": "%j", 225 "D": "%-j", 226 "E": "%a", 227 "EE": "%a", 228 "EEE": "%a", 229 "EEEE": "%A", 230 } 231 232 DATE_FORMAT = "'yyyy-MM-dd'" 233 DATEINT_FORMAT = "'yyyyMMdd'" 234 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 235 236 class Tokenizer(tokens.Tokenizer): 237 QUOTES = ["'", '"'] 238 IDENTIFIERS = ["`"] 239 STRING_ESCAPES = ["\\"] 240 241 SINGLE_TOKENS = { 242 **tokens.Tokenizer.SINGLE_TOKENS, 243 "$": TokenType.PARAMETER, 244 } 245 246 KEYWORDS = { 247 **tokens.Tokenizer.KEYWORDS, 248 "ADD ARCHIVE": TokenType.COMMAND, 249 "ADD ARCHIVES": TokenType.COMMAND, 250 "ADD FILE": TokenType.COMMAND, 251 "ADD FILES": TokenType.COMMAND, 252 "ADD JAR": TokenType.COMMAND, 253 "ADD JARS": TokenType.COMMAND, 254 "MSCK REPAIR": TokenType.COMMAND, 255 "REFRESH": TokenType.REFRESH, 256 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 257 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 258 "SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 259 } 260 261 NUMERIC_LITERALS = { 262 "L": "BIGINT", 263 "S": "SMALLINT", 264 "Y": "TINYINT", 265 "D": "DOUBLE", 266 "F": "FLOAT", 267 "BD": "DECIMAL", 268 } 269 270 class Parser(parser.Parser): 271 LOG_DEFAULTS_TO_LN = True 272 STRICT_CAST = False 273 VALUES_FOLLOWED_BY_PAREN = False 274 275 FUNCTIONS = { 276 **parser.Parser.FUNCTIONS, 277 "BASE64": exp.ToBase64.from_arg_list, 278 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 279 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 280 "DATE_ADD": lambda args: exp.TsOrDsAdd( 281 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 282 ), 283 "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")( 284 [ 285 exp.TimeStrToTime(this=seq_get(args, 0)), 286 seq_get(args, 1), 287 ] 288 ), 289 "DATE_SUB": lambda args: exp.TsOrDsAdd( 290 this=seq_get(args, 0), 291 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 292 unit=exp.Literal.string("DAY"), 293 ), 294 "DATEDIFF": lambda args: exp.DateDiff( 295 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 296 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 297 ), 298 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 299 "FIRST": _build_with_ignore_nulls(exp.First), 300 "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue), 301 "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True), 302 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 303 "LAST": _build_with_ignore_nulls(exp.Last), 304 "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue), 305 "LOCATE": locate_to_strposition, 306 "MAP": parser.build_var_map, 307 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 308 "PERCENTILE": exp.Quantile.from_arg_list, 309 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 310 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 311 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 312 ), 313 "SIZE": exp.ArraySize.from_arg_list, 314 "SPLIT": exp.RegexpSplit.from_arg_list, 315 "STR_TO_MAP": lambda args: exp.StrToMap( 316 this=seq_get(args, 0), 317 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 318 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 319 ), 320 "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"), 321 "TO_JSON": exp.JSONFormat.from_arg_list, 322 "TRUNC": exp.TimestampTrunc.from_arg_list, 323 "UNBASE64": exp.FromBase64.from_arg_list, 324 "UNIX_TIMESTAMP": lambda args: build_formatted_time(exp.StrToUnix, "hive", True)( 325 args or [exp.CurrentTimestamp()] 326 ), 327 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 328 } 329 330 NO_PAREN_FUNCTION_PARSERS = { 331 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 332 "TRANSFORM": lambda self: self._parse_transform(), 333 } 334 335 PROPERTY_PARSERS = { 336 **parser.Parser.PROPERTY_PARSERS, 337 "SERDEPROPERTIES": lambda self: exp.SerdeProperties( 338 expressions=self._parse_wrapped_csv(self._parse_property) 339 ), 340 } 341 342 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 343 if not self._match(TokenType.L_PAREN, advance=False): 344 self._retreat(self._index - 1) 345 return None 346 347 args = self._parse_wrapped_csv(self._parse_lambda) 348 row_format_before = self._parse_row_format(match_row=True) 349 350 record_writer = None 351 if self._match_text_seq("RECORDWRITER"): 352 record_writer = self._parse_string() 353 354 if not self._match(TokenType.USING): 355 return exp.Transform.from_arg_list(args) 356 357 command_script = self._parse_string() 358 359 self._match(TokenType.ALIAS) 360 schema = self._parse_schema() 361 362 row_format_after = self._parse_row_format(match_row=True) 363 record_reader = None 364 if self._match_text_seq("RECORDREADER"): 365 record_reader = self._parse_string() 366 367 return self.expression( 368 exp.QueryTransform, 369 expressions=args, 370 command_script=command_script, 371 schema=schema, 372 row_format_before=row_format_before, 373 record_writer=record_writer, 374 row_format_after=row_format_after, 375 record_reader=record_reader, 376 ) 377 378 def _parse_types( 379 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 380 ) -> t.Optional[exp.Expression]: 381 """ 382 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 383 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 384 385 spark-sql (default)> select cast(1234 as varchar(2)); 386 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 387 char/varchar type and simply treats them as string type. Please use string type 388 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 389 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 390 391 1234 392 Time taken: 4.265 seconds, Fetched 1 row(s) 393 394 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 395 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 396 397 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 398 """ 399 this = super()._parse_types( 400 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 401 ) 402 403 if this and not schema: 404 return this.transform( 405 lambda node: ( 406 node.replace(exp.DataType.build("text")) 407 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 408 else node 409 ), 410 copy=False, 411 ) 412 413 return this 414 415 def _parse_partition_and_order( 416 self, 417 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 418 return ( 419 ( 420 self._parse_csv(self._parse_assignment) 421 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 422 else [] 423 ), 424 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 425 ) 426 427 def _parse_parameter(self) -> exp.Parameter: 428 self._match(TokenType.L_BRACE) 429 this = self._parse_identifier() or self._parse_primary_or_var() 430 expression = self._match(TokenType.COLON) and ( 431 self._parse_identifier() or self._parse_primary_or_var() 432 ) 433 self._match(TokenType.R_BRACE) 434 return self.expression(exp.Parameter, this=this, expression=expression) 435 436 class Generator(generator.Generator): 437 LIMIT_FETCH = "LIMIT" 438 TABLESAMPLE_WITH_METHOD = False 439 JOIN_HINTS = False 440 TABLE_HINTS = False 441 QUERY_HINTS = False 442 INDEX_ON = "ON TABLE" 443 EXTRACT_ALLOWS_QUOTES = False 444 NVL2_SUPPORTED = False 445 LAST_DAY_SUPPORTS_DATE_PART = False 446 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 447 SUPPORTS_TO_NUMBER = False 448 WITH_PROPERTIES_PREFIX = "TBLPROPERTIES" 449 450 EXPRESSIONS_WITHOUT_NESTED_CTES = { 451 exp.Insert, 452 exp.Select, 453 exp.Subquery, 454 exp.Union, 455 } 456 457 SUPPORTED_JSON_PATH_PARTS = { 458 exp.JSONPathKey, 459 exp.JSONPathRoot, 460 exp.JSONPathSubscript, 461 exp.JSONPathWildcard, 462 } 463 464 TYPE_MAPPING = { 465 **generator.Generator.TYPE_MAPPING, 466 exp.DataType.Type.BIT: "BOOLEAN", 467 exp.DataType.Type.DATETIME: "TIMESTAMP", 468 exp.DataType.Type.ROWVERSION: "BINARY", 469 exp.DataType.Type.TEXT: "STRING", 470 exp.DataType.Type.TIME: "TIMESTAMP", 471 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 472 exp.DataType.Type.UTINYINT: "SMALLINT", 473 exp.DataType.Type.VARBINARY: "BINARY", 474 } 475 476 TRANSFORMS = { 477 **generator.Generator.TRANSFORMS, 478 exp.Group: transforms.preprocess([transforms.unalias_group]), 479 exp.Select: transforms.preprocess( 480 [ 481 transforms.eliminate_qualify, 482 transforms.eliminate_distinct_on, 483 transforms.unnest_to_explode, 484 ] 485 ), 486 exp.Property: _property_sql, 487 exp.AnyValue: rename_func("FIRST"), 488 exp.ApproxDistinct: approx_count_distinct_sql, 489 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 490 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 491 exp.ArrayConcat: rename_func("CONCAT"), 492 exp.ArrayToString: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 493 exp.ArraySize: rename_func("SIZE"), 494 exp.ArraySort: _array_sort_sql, 495 exp.With: no_recursive_cte_sql, 496 exp.DateAdd: _add_date_sql, 497 exp.DateDiff: _date_diff_sql, 498 exp.DateStrToDate: datestrtodate_sql, 499 exp.DateSub: _add_date_sql, 500 exp.DateToDi: lambda self, 501 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 502 exp.DiToDate: lambda self, 503 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 504 exp.FileFormatProperty: lambda self, 505 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 506 exp.FromBase64: rename_func("UNBASE64"), 507 exp.If: if_sql(), 508 exp.ILike: no_ilike_sql, 509 exp.IsNan: rename_func("ISNAN"), 510 exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression), 511 exp.JSONExtractScalar: lambda self, e: self.func( 512 "GET_JSON_OBJECT", e.this, e.expression 513 ), 514 exp.JSONFormat: _json_format_sql, 515 exp.Left: left_to_substring_sql, 516 exp.Map: var_map_sql, 517 exp.Max: max_or_greatest, 518 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 519 exp.Min: min_or_least, 520 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 521 exp.NotNullColumnConstraint: lambda _, e: ( 522 "" if e.args.get("allow_null") else "NOT NULL" 523 ), 524 exp.VarMap: var_map_sql, 525 exp.Create: preprocess( 526 [ 527 remove_unique_constraints, 528 ctas_with_tmp_tables_to_create_tmp_view, 529 move_schema_columns_to_partitioned_by, 530 ] 531 ), 532 exp.Quantile: rename_func("PERCENTILE"), 533 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 534 exp.RegexpExtract: regexp_extract_sql, 535 exp.RegexpReplace: regexp_replace_sql, 536 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 537 exp.RegexpSplit: rename_func("SPLIT"), 538 exp.Right: right_to_substring_sql, 539 exp.SafeDivide: no_safe_divide_sql, 540 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 541 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 542 exp.Split: lambda self, e: self.func( 543 "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression) 544 ), 545 exp.StrPosition: strposition_to_locate_sql, 546 exp.StrToDate: _str_to_date_sql, 547 exp.StrToTime: _str_to_time_sql, 548 exp.StrToUnix: _str_to_unix_sql, 549 exp.StructExtract: struct_extract_sql, 550 exp.TimeStrToDate: rename_func("TO_DATE"), 551 exp.TimeStrToTime: timestrtotime_sql, 552 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 553 exp.TimestampTrunc: lambda self, e: self.func("TRUNC", e.this, unit_to_str(e)), 554 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 555 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 556 exp.ToBase64: rename_func("BASE64"), 557 exp.TsOrDiToDi: lambda self, 558 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 559 exp.TsOrDsAdd: _add_date_sql, 560 exp.TsOrDsDiff: _date_diff_sql, 561 exp.TsOrDsToDate: _to_date_sql, 562 exp.TryCast: no_trycast_sql, 563 exp.UnixToStr: lambda self, e: self.func( 564 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 565 ), 566 exp.UnixToTime: _unix_to_time_sql, 567 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 568 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 569 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 570 exp.National: lambda self, e: self.national_sql(e, prefix=""), 571 exp.ClusteredColumnConstraint: lambda self, 572 e: f"({self.expressions(e, 'this', indent=False)})", 573 exp.NonClusteredColumnConstraint: lambda self, 574 e: f"({self.expressions(e, 'this', indent=False)})", 575 exp.NotForReplicationColumnConstraint: lambda *_: "", 576 exp.OnProperty: lambda *_: "", 577 exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY", 578 exp.ParseJSON: lambda self, e: self.sql(e.this), 579 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 580 exp.DayOfMonth: rename_func("DAYOFMONTH"), 581 exp.DayOfWeek: rename_func("DAYOFWEEK"), 582 } 583 584 PROPERTIES_LOCATION = { 585 **generator.Generator.PROPERTIES_LOCATION, 586 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 587 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 588 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 589 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 590 } 591 592 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 593 if isinstance(expression.this, exp.JSONPathWildcard): 594 self.unsupported("Unsupported wildcard in JSONPathKey expression") 595 return "" 596 597 return super()._jsonpathkey_sql(expression) 598 599 def parameter_sql(self, expression: exp.Parameter) -> str: 600 this = self.sql(expression, "this") 601 expression_sql = self.sql(expression, "expression") 602 603 parent = expression.parent 604 this = f"{this}:{expression_sql}" if expression_sql else this 605 606 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 607 # We need to produce SET key = value instead of SET ${key} = value 608 return this 609 610 return f"${{{this}}}" 611 612 def schema_sql(self, expression: exp.Schema) -> str: 613 for ordered in expression.find_all(exp.Ordered): 614 if ordered.args.get("desc") is False: 615 ordered.set("desc", None) 616 617 return super().schema_sql(expression) 618 619 def constraint_sql(self, expression: exp.Constraint) -> str: 620 for prop in list(expression.find_all(exp.Properties)): 621 prop.pop() 622 623 this = self.sql(expression, "this") 624 expressions = self.expressions(expression, sep=" ", flat=True) 625 return f"CONSTRAINT {this} {expressions}" 626 627 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 628 serde_props = self.sql(expression, "serde_properties") 629 serde_props = f" {serde_props}" if serde_props else "" 630 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 631 632 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 633 return self.func( 634 "COLLECT_LIST", 635 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 636 ) 637 638 def datatype_sql(self, expression: exp.DataType) -> str: 639 if expression.this in self.PARAMETERIZABLE_TEXT_TYPES and ( 640 not expression.expressions or expression.expressions[0].name == "MAX" 641 ): 642 expression = exp.DataType.build("text") 643 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 644 expression.set("this", exp.DataType.Type.VARCHAR) 645 elif expression.this in exp.DataType.TEMPORAL_TYPES: 646 expression = exp.DataType.build(expression.this) 647 elif expression.is_type("float"): 648 size_expression = expression.find(exp.DataTypeParam) 649 if size_expression: 650 size = int(size_expression.name) 651 expression = ( 652 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 653 ) 654 655 return super().datatype_sql(expression) 656 657 def version_sql(self, expression: exp.Version) -> str: 658 sql = super().version_sql(expression) 659 return sql.replace("FOR ", "", 1) 660 661 def struct_sql(self, expression: exp.Struct) -> str: 662 values = [] 663 664 for i, e in enumerate(expression.expressions): 665 if isinstance(e, exp.PropertyEQ): 666 self.unsupported("Hive does not support named structs.") 667 values.append(e.expression) 668 else: 669 values.append(e) 670 671 return self.func("STRUCT", *values) 672 673 def alterset_sql(self, expression: exp.AlterSet) -> str: 674 exprs = self.expressions(expression, flat=True) 675 exprs = f" {exprs}" if exprs else "" 676 location = self.sql(expression, "location") 677 location = f" LOCATION {location}" if location else "" 678 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 679 file_format = f" FILEFORMAT {file_format}" if file_format else "" 680 serde = self.sql(expression, "serde") 681 serde = f" SERDE {serde}" if serde else "" 682 tags = self.expressions(expression, key="tag", flat=True, sep="") 683 tags = f" TAGS {tags}" if tags else "" 684 685 return f"SET{serde}{exprs}{location}{file_format}{tags}" 686 687 def serdeproperties_sql(self, expression: exp.SerdeProperties) -> str: 688 prefix = "WITH " if expression.args.get("with") else "" 689 exprs = self.expressions(expression, flat=True) 690 691 return f"{prefix}SERDEPROPERTIES ({exprs})"
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
193class Hive(Dialect): 194 ALIAS_POST_TABLESAMPLE = True 195 IDENTIFIERS_CAN_START_WITH_DIGIT = True 196 SUPPORTS_USER_DEFINED_TYPES = False 197 SAFE_DIVISION = True 198 199 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 200 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 201 202 TIME_MAPPING = { 203 "y": "%Y", 204 "Y": "%Y", 205 "YYYY": "%Y", 206 "yyyy": "%Y", 207 "YY": "%y", 208 "yy": "%y", 209 "MMMM": "%B", 210 "MMM": "%b", 211 "MM": "%m", 212 "M": "%-m", 213 "dd": "%d", 214 "d": "%-d", 215 "HH": "%H", 216 "H": "%-H", 217 "hh": "%I", 218 "h": "%-I", 219 "mm": "%M", 220 "m": "%-M", 221 "ss": "%S", 222 "s": "%-S", 223 "SSSSSS": "%f", 224 "a": "%p", 225 "DD": "%j", 226 "D": "%-j", 227 "E": "%a", 228 "EE": "%a", 229 "EEE": "%a", 230 "EEEE": "%A", 231 } 232 233 DATE_FORMAT = "'yyyy-MM-dd'" 234 DATEINT_FORMAT = "'yyyyMMdd'" 235 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 236 237 class Tokenizer(tokens.Tokenizer): 238 QUOTES = ["'", '"'] 239 IDENTIFIERS = ["`"] 240 STRING_ESCAPES = ["\\"] 241 242 SINGLE_TOKENS = { 243 **tokens.Tokenizer.SINGLE_TOKENS, 244 "$": TokenType.PARAMETER, 245 } 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 "ADD ARCHIVE": TokenType.COMMAND, 250 "ADD ARCHIVES": TokenType.COMMAND, 251 "ADD FILE": TokenType.COMMAND, 252 "ADD FILES": TokenType.COMMAND, 253 "ADD JAR": TokenType.COMMAND, 254 "ADD JARS": TokenType.COMMAND, 255 "MSCK REPAIR": TokenType.COMMAND, 256 "REFRESH": TokenType.REFRESH, 257 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 258 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 259 "SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 260 } 261 262 NUMERIC_LITERALS = { 263 "L": "BIGINT", 264 "S": "SMALLINT", 265 "Y": "TINYINT", 266 "D": "DOUBLE", 267 "F": "FLOAT", 268 "BD": "DECIMAL", 269 } 270 271 class Parser(parser.Parser): 272 LOG_DEFAULTS_TO_LN = True 273 STRICT_CAST = False 274 VALUES_FOLLOWED_BY_PAREN = False 275 276 FUNCTIONS = { 277 **parser.Parser.FUNCTIONS, 278 "BASE64": exp.ToBase64.from_arg_list, 279 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 280 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 281 "DATE_ADD": lambda args: exp.TsOrDsAdd( 282 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 283 ), 284 "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")( 285 [ 286 exp.TimeStrToTime(this=seq_get(args, 0)), 287 seq_get(args, 1), 288 ] 289 ), 290 "DATE_SUB": lambda args: exp.TsOrDsAdd( 291 this=seq_get(args, 0), 292 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 293 unit=exp.Literal.string("DAY"), 294 ), 295 "DATEDIFF": lambda args: exp.DateDiff( 296 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 297 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 298 ), 299 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 300 "FIRST": _build_with_ignore_nulls(exp.First), 301 "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue), 302 "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True), 303 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 304 "LAST": _build_with_ignore_nulls(exp.Last), 305 "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue), 306 "LOCATE": locate_to_strposition, 307 "MAP": parser.build_var_map, 308 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 309 "PERCENTILE": exp.Quantile.from_arg_list, 310 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 311 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 312 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 313 ), 314 "SIZE": exp.ArraySize.from_arg_list, 315 "SPLIT": exp.RegexpSplit.from_arg_list, 316 "STR_TO_MAP": lambda args: exp.StrToMap( 317 this=seq_get(args, 0), 318 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 319 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 320 ), 321 "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"), 322 "TO_JSON": exp.JSONFormat.from_arg_list, 323 "TRUNC": exp.TimestampTrunc.from_arg_list, 324 "UNBASE64": exp.FromBase64.from_arg_list, 325 "UNIX_TIMESTAMP": lambda args: build_formatted_time(exp.StrToUnix, "hive", True)( 326 args or [exp.CurrentTimestamp()] 327 ), 328 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 329 } 330 331 NO_PAREN_FUNCTION_PARSERS = { 332 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 333 "TRANSFORM": lambda self: self._parse_transform(), 334 } 335 336 PROPERTY_PARSERS = { 337 **parser.Parser.PROPERTY_PARSERS, 338 "SERDEPROPERTIES": lambda self: exp.SerdeProperties( 339 expressions=self._parse_wrapped_csv(self._parse_property) 340 ), 341 } 342 343 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 344 if not self._match(TokenType.L_PAREN, advance=False): 345 self._retreat(self._index - 1) 346 return None 347 348 args = self._parse_wrapped_csv(self._parse_lambda) 349 row_format_before = self._parse_row_format(match_row=True) 350 351 record_writer = None 352 if self._match_text_seq("RECORDWRITER"): 353 record_writer = self._parse_string() 354 355 if not self._match(TokenType.USING): 356 return exp.Transform.from_arg_list(args) 357 358 command_script = self._parse_string() 359 360 self._match(TokenType.ALIAS) 361 schema = self._parse_schema() 362 363 row_format_after = self._parse_row_format(match_row=True) 364 record_reader = None 365 if self._match_text_seq("RECORDREADER"): 366 record_reader = self._parse_string() 367 368 return self.expression( 369 exp.QueryTransform, 370 expressions=args, 371 command_script=command_script, 372 schema=schema, 373 row_format_before=row_format_before, 374 record_writer=record_writer, 375 row_format_after=row_format_after, 376 record_reader=record_reader, 377 ) 378 379 def _parse_types( 380 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 381 ) -> t.Optional[exp.Expression]: 382 """ 383 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 384 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 385 386 spark-sql (default)> select cast(1234 as varchar(2)); 387 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 388 char/varchar type and simply treats them as string type. Please use string type 389 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 390 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 391 392 1234 393 Time taken: 4.265 seconds, Fetched 1 row(s) 394 395 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 396 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 397 398 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 399 """ 400 this = super()._parse_types( 401 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 402 ) 403 404 if this and not schema: 405 return this.transform( 406 lambda node: ( 407 node.replace(exp.DataType.build("text")) 408 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 409 else node 410 ), 411 copy=False, 412 ) 413 414 return this 415 416 def _parse_partition_and_order( 417 self, 418 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 419 return ( 420 ( 421 self._parse_csv(self._parse_assignment) 422 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 423 else [] 424 ), 425 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 426 ) 427 428 def _parse_parameter(self) -> exp.Parameter: 429 self._match(TokenType.L_BRACE) 430 this = self._parse_identifier() or self._parse_primary_or_var() 431 expression = self._match(TokenType.COLON) and ( 432 self._parse_identifier() or self._parse_primary_or_var() 433 ) 434 self._match(TokenType.R_BRACE) 435 return self.expression(exp.Parameter, this=this, expression=expression) 436 437 class Generator(generator.Generator): 438 LIMIT_FETCH = "LIMIT" 439 TABLESAMPLE_WITH_METHOD = False 440 JOIN_HINTS = False 441 TABLE_HINTS = False 442 QUERY_HINTS = False 443 INDEX_ON = "ON TABLE" 444 EXTRACT_ALLOWS_QUOTES = False 445 NVL2_SUPPORTED = False 446 LAST_DAY_SUPPORTS_DATE_PART = False 447 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 448 SUPPORTS_TO_NUMBER = False 449 WITH_PROPERTIES_PREFIX = "TBLPROPERTIES" 450 451 EXPRESSIONS_WITHOUT_NESTED_CTES = { 452 exp.Insert, 453 exp.Select, 454 exp.Subquery, 455 exp.Union, 456 } 457 458 SUPPORTED_JSON_PATH_PARTS = { 459 exp.JSONPathKey, 460 exp.JSONPathRoot, 461 exp.JSONPathSubscript, 462 exp.JSONPathWildcard, 463 } 464 465 TYPE_MAPPING = { 466 **generator.Generator.TYPE_MAPPING, 467 exp.DataType.Type.BIT: "BOOLEAN", 468 exp.DataType.Type.DATETIME: "TIMESTAMP", 469 exp.DataType.Type.ROWVERSION: "BINARY", 470 exp.DataType.Type.TEXT: "STRING", 471 exp.DataType.Type.TIME: "TIMESTAMP", 472 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 473 exp.DataType.Type.UTINYINT: "SMALLINT", 474 exp.DataType.Type.VARBINARY: "BINARY", 475 } 476 477 TRANSFORMS = { 478 **generator.Generator.TRANSFORMS, 479 exp.Group: transforms.preprocess([transforms.unalias_group]), 480 exp.Select: transforms.preprocess( 481 [ 482 transforms.eliminate_qualify, 483 transforms.eliminate_distinct_on, 484 transforms.unnest_to_explode, 485 ] 486 ), 487 exp.Property: _property_sql, 488 exp.AnyValue: rename_func("FIRST"), 489 exp.ApproxDistinct: approx_count_distinct_sql, 490 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 491 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 492 exp.ArrayConcat: rename_func("CONCAT"), 493 exp.ArrayToString: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 494 exp.ArraySize: rename_func("SIZE"), 495 exp.ArraySort: _array_sort_sql, 496 exp.With: no_recursive_cte_sql, 497 exp.DateAdd: _add_date_sql, 498 exp.DateDiff: _date_diff_sql, 499 exp.DateStrToDate: datestrtodate_sql, 500 exp.DateSub: _add_date_sql, 501 exp.DateToDi: lambda self, 502 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 503 exp.DiToDate: lambda self, 504 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 505 exp.FileFormatProperty: lambda self, 506 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 507 exp.FromBase64: rename_func("UNBASE64"), 508 exp.If: if_sql(), 509 exp.ILike: no_ilike_sql, 510 exp.IsNan: rename_func("ISNAN"), 511 exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression), 512 exp.JSONExtractScalar: lambda self, e: self.func( 513 "GET_JSON_OBJECT", e.this, e.expression 514 ), 515 exp.JSONFormat: _json_format_sql, 516 exp.Left: left_to_substring_sql, 517 exp.Map: var_map_sql, 518 exp.Max: max_or_greatest, 519 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 520 exp.Min: min_or_least, 521 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 522 exp.NotNullColumnConstraint: lambda _, e: ( 523 "" if e.args.get("allow_null") else "NOT NULL" 524 ), 525 exp.VarMap: var_map_sql, 526 exp.Create: preprocess( 527 [ 528 remove_unique_constraints, 529 ctas_with_tmp_tables_to_create_tmp_view, 530 move_schema_columns_to_partitioned_by, 531 ] 532 ), 533 exp.Quantile: rename_func("PERCENTILE"), 534 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 535 exp.RegexpExtract: regexp_extract_sql, 536 exp.RegexpReplace: regexp_replace_sql, 537 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 538 exp.RegexpSplit: rename_func("SPLIT"), 539 exp.Right: right_to_substring_sql, 540 exp.SafeDivide: no_safe_divide_sql, 541 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 542 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 543 exp.Split: lambda self, e: self.func( 544 "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression) 545 ), 546 exp.StrPosition: strposition_to_locate_sql, 547 exp.StrToDate: _str_to_date_sql, 548 exp.StrToTime: _str_to_time_sql, 549 exp.StrToUnix: _str_to_unix_sql, 550 exp.StructExtract: struct_extract_sql, 551 exp.TimeStrToDate: rename_func("TO_DATE"), 552 exp.TimeStrToTime: timestrtotime_sql, 553 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 554 exp.TimestampTrunc: lambda self, e: self.func("TRUNC", e.this, unit_to_str(e)), 555 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 556 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 557 exp.ToBase64: rename_func("BASE64"), 558 exp.TsOrDiToDi: lambda self, 559 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 560 exp.TsOrDsAdd: _add_date_sql, 561 exp.TsOrDsDiff: _date_diff_sql, 562 exp.TsOrDsToDate: _to_date_sql, 563 exp.TryCast: no_trycast_sql, 564 exp.UnixToStr: lambda self, e: self.func( 565 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 566 ), 567 exp.UnixToTime: _unix_to_time_sql, 568 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 569 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 570 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 571 exp.National: lambda self, e: self.national_sql(e, prefix=""), 572 exp.ClusteredColumnConstraint: lambda self, 573 e: f"({self.expressions(e, 'this', indent=False)})", 574 exp.NonClusteredColumnConstraint: lambda self, 575 e: f"({self.expressions(e, 'this', indent=False)})", 576 exp.NotForReplicationColumnConstraint: lambda *_: "", 577 exp.OnProperty: lambda *_: "", 578 exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY", 579 exp.ParseJSON: lambda self, e: self.sql(e.this), 580 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 581 exp.DayOfMonth: rename_func("DAYOFMONTH"), 582 exp.DayOfWeek: rename_func("DAYOFWEEK"), 583 } 584 585 PROPERTIES_LOCATION = { 586 **generator.Generator.PROPERTIES_LOCATION, 587 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 588 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 589 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 590 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 591 } 592 593 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 594 if isinstance(expression.this, exp.JSONPathWildcard): 595 self.unsupported("Unsupported wildcard in JSONPathKey expression") 596 return "" 597 598 return super()._jsonpathkey_sql(expression) 599 600 def parameter_sql(self, expression: exp.Parameter) -> str: 601 this = self.sql(expression, "this") 602 expression_sql = self.sql(expression, "expression") 603 604 parent = expression.parent 605 this = f"{this}:{expression_sql}" if expression_sql else this 606 607 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 608 # We need to produce SET key = value instead of SET ${key} = value 609 return this 610 611 return f"${{{this}}}" 612 613 def schema_sql(self, expression: exp.Schema) -> str: 614 for ordered in expression.find_all(exp.Ordered): 615 if ordered.args.get("desc") is False: 616 ordered.set("desc", None) 617 618 return super().schema_sql(expression) 619 620 def constraint_sql(self, expression: exp.Constraint) -> str: 621 for prop in list(expression.find_all(exp.Properties)): 622 prop.pop() 623 624 this = self.sql(expression, "this") 625 expressions = self.expressions(expression, sep=" ", flat=True) 626 return f"CONSTRAINT {this} {expressions}" 627 628 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 629 serde_props = self.sql(expression, "serde_properties") 630 serde_props = f" {serde_props}" if serde_props else "" 631 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 632 633 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 634 return self.func( 635 "COLLECT_LIST", 636 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 637 ) 638 639 def datatype_sql(self, expression: exp.DataType) -> str: 640 if expression.this in self.PARAMETERIZABLE_TEXT_TYPES and ( 641 not expression.expressions or expression.expressions[0].name == "MAX" 642 ): 643 expression = exp.DataType.build("text") 644 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 645 expression.set("this", exp.DataType.Type.VARCHAR) 646 elif expression.this in exp.DataType.TEMPORAL_TYPES: 647 expression = exp.DataType.build(expression.this) 648 elif expression.is_type("float"): 649 size_expression = expression.find(exp.DataTypeParam) 650 if size_expression: 651 size = int(size_expression.name) 652 expression = ( 653 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 654 ) 655 656 return super().datatype_sql(expression) 657 658 def version_sql(self, expression: exp.Version) -> str: 659 sql = super().version_sql(expression) 660 return sql.replace("FOR ", "", 1) 661 662 def struct_sql(self, expression: exp.Struct) -> str: 663 values = [] 664 665 for i, e in enumerate(expression.expressions): 666 if isinstance(e, exp.PropertyEQ): 667 self.unsupported("Hive does not support named structs.") 668 values.append(e.expression) 669 else: 670 values.append(e) 671 672 return self.func("STRUCT", *values) 673 674 def alterset_sql(self, expression: exp.AlterSet) -> str: 675 exprs = self.expressions(expression, flat=True) 676 exprs = f" {exprs}" if exprs else "" 677 location = self.sql(expression, "location") 678 location = f" LOCATION {location}" if location else "" 679 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 680 file_format = f" FILEFORMAT {file_format}" if file_format else "" 681 serde = self.sql(expression, "serde") 682 serde = f" SERDE {serde}" if serde else "" 683 tags = self.expressions(expression, key="tag", flat=True, sep="") 684 tags = f" TAGS {tags}" if tags else "" 685 686 return f"SET{serde}{exprs}{location}{file_format}{tags}" 687 688 def serdeproperties_sql(self, expression: exp.SerdeProperties) -> str: 689 prefix = "WITH " if expression.args.get("with") else "" 690 exprs = self.expressions(expression, flat=True) 691 692 return f"{prefix}SERDEPROPERTIES ({exprs})"
NORMALIZATION_STRATEGY =
<NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>
Specifies the strategy according to which identifiers should be normalized.
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
Associates this dialect's time formats with their equivalent Python strftime
formats.
UNESCAPED_SEQUENCES: Dict[str, str] =
{'\\a': '\x07', '\\b': '\x08', '\\f': '\x0c', '\\n': '\n', '\\r': '\r', '\\t': '\t', '\\v': '\x0b', '\\\\': '\\'}
Mapping of an escaped sequence (\n
) to its unescaped version (
).
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
ESCAPED_SEQUENCES: Dict[str, str] =
{'\x07': '\\a', '\x08': '\\b', '\x0c': '\\f', '\n': '\\n', '\r': '\\r', '\t': '\\t', '\x0b': '\\v', '\\': '\\\\'}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- TABLESAMPLE_SIZE_IS_PERCENT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- COPY_PARAMS_ARE_CSV
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
237 class Tokenizer(tokens.Tokenizer): 238 QUOTES = ["'", '"'] 239 IDENTIFIERS = ["`"] 240 STRING_ESCAPES = ["\\"] 241 242 SINGLE_TOKENS = { 243 **tokens.Tokenizer.SINGLE_TOKENS, 244 "$": TokenType.PARAMETER, 245 } 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 "ADD ARCHIVE": TokenType.COMMAND, 250 "ADD ARCHIVES": TokenType.COMMAND, 251 "ADD FILE": TokenType.COMMAND, 252 "ADD FILES": TokenType.COMMAND, 253 "ADD JAR": TokenType.COMMAND, 254 "ADD JARS": TokenType.COMMAND, 255 "MSCK REPAIR": TokenType.COMMAND, 256 "REFRESH": TokenType.REFRESH, 257 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 258 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 259 "SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 260 } 261 262 NUMERIC_LITERALS = { 263 "L": "BIGINT", 264 "S": "SMALLINT", 265 "Y": "TINYINT", 266 "D": "DOUBLE", 267 "F": "FLOAT", 268 "BD": "DECIMAL", 269 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, '#': <TokenType.HASH: 'HASH'>, "'": <TokenType.UNKNOWN: 'UNKNOWN'>, '`': <TokenType.UNKNOWN: 'UNKNOWN'>, '"': <TokenType.UNKNOWN: 'UNKNOWN'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'COPY': <TokenType.COPY: 'COPY'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ENUM': <TokenType.ENUM: 'ENUM'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'TRUNCATE': <TokenType.TRUNCATE: 'TRUNCATE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'UINT': <TokenType.UINT: 'UINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'LIST': <TokenType.LIST: 'LIST'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'JSONB': <TokenType.JSONB: 'JSONB'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'TIMESTAMP_LTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'TIMESTAMPNTZ': <TokenType.TIMESTAMPNTZ: 'TIMESTAMPNTZ'>, 'TIMESTAMP_NTZ': <TokenType.TIMESTAMPNTZ: 'TIMESTAMPNTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'SEQUENCE': <TokenType.SEQUENCE: 'SEQUENCE'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
NUMERIC_LITERALS =
{'L': 'BIGINT', 'S': 'SMALLINT', 'Y': 'TINYINT', 'D': 'DOUBLE', 'F': 'FLOAT', 'BD': 'DECIMAL'}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
271 class Parser(parser.Parser): 272 LOG_DEFAULTS_TO_LN = True 273 STRICT_CAST = False 274 VALUES_FOLLOWED_BY_PAREN = False 275 276 FUNCTIONS = { 277 **parser.Parser.FUNCTIONS, 278 "BASE64": exp.ToBase64.from_arg_list, 279 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 280 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 281 "DATE_ADD": lambda args: exp.TsOrDsAdd( 282 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 283 ), 284 "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")( 285 [ 286 exp.TimeStrToTime(this=seq_get(args, 0)), 287 seq_get(args, 1), 288 ] 289 ), 290 "DATE_SUB": lambda args: exp.TsOrDsAdd( 291 this=seq_get(args, 0), 292 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 293 unit=exp.Literal.string("DAY"), 294 ), 295 "DATEDIFF": lambda args: exp.DateDiff( 296 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 297 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 298 ), 299 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 300 "FIRST": _build_with_ignore_nulls(exp.First), 301 "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue), 302 "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True), 303 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 304 "LAST": _build_with_ignore_nulls(exp.Last), 305 "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue), 306 "LOCATE": locate_to_strposition, 307 "MAP": parser.build_var_map, 308 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 309 "PERCENTILE": exp.Quantile.from_arg_list, 310 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 311 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 312 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 313 ), 314 "SIZE": exp.ArraySize.from_arg_list, 315 "SPLIT": exp.RegexpSplit.from_arg_list, 316 "STR_TO_MAP": lambda args: exp.StrToMap( 317 this=seq_get(args, 0), 318 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 319 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 320 ), 321 "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"), 322 "TO_JSON": exp.JSONFormat.from_arg_list, 323 "TRUNC": exp.TimestampTrunc.from_arg_list, 324 "UNBASE64": exp.FromBase64.from_arg_list, 325 "UNIX_TIMESTAMP": lambda args: build_formatted_time(exp.StrToUnix, "hive", True)( 326 args or [exp.CurrentTimestamp()] 327 ), 328 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 329 } 330 331 NO_PAREN_FUNCTION_PARSERS = { 332 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 333 "TRANSFORM": lambda self: self._parse_transform(), 334 } 335 336 PROPERTY_PARSERS = { 337 **parser.Parser.PROPERTY_PARSERS, 338 "SERDEPROPERTIES": lambda self: exp.SerdeProperties( 339 expressions=self._parse_wrapped_csv(self._parse_property) 340 ), 341 } 342 343 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 344 if not self._match(TokenType.L_PAREN, advance=False): 345 self._retreat(self._index - 1) 346 return None 347 348 args = self._parse_wrapped_csv(self._parse_lambda) 349 row_format_before = self._parse_row_format(match_row=True) 350 351 record_writer = None 352 if self._match_text_seq("RECORDWRITER"): 353 record_writer = self._parse_string() 354 355 if not self._match(TokenType.USING): 356 return exp.Transform.from_arg_list(args) 357 358 command_script = self._parse_string() 359 360 self._match(TokenType.ALIAS) 361 schema = self._parse_schema() 362 363 row_format_after = self._parse_row_format(match_row=True) 364 record_reader = None 365 if self._match_text_seq("RECORDREADER"): 366 record_reader = self._parse_string() 367 368 return self.expression( 369 exp.QueryTransform, 370 expressions=args, 371 command_script=command_script, 372 schema=schema, 373 row_format_before=row_format_before, 374 record_writer=record_writer, 375 row_format_after=row_format_after, 376 record_reader=record_reader, 377 ) 378 379 def _parse_types( 380 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 381 ) -> t.Optional[exp.Expression]: 382 """ 383 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 384 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 385 386 spark-sql (default)> select cast(1234 as varchar(2)); 387 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 388 char/varchar type and simply treats them as string type. Please use string type 389 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 390 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 391 392 1234 393 Time taken: 4.265 seconds, Fetched 1 row(s) 394 395 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 396 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 397 398 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 399 """ 400 this = super()._parse_types( 401 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 402 ) 403 404 if this and not schema: 405 return this.transform( 406 lambda node: ( 407 node.replace(exp.DataType.build("text")) 408 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 409 else node 410 ), 411 copy=False, 412 ) 413 414 return this 415 416 def _parse_partition_and_order( 417 self, 418 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 419 return ( 420 ( 421 self._parse_csv(self._parse_assignment) 422 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 423 else [] 424 ), 425 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 426 ) 427 428 def _parse_parameter(self) -> exp.Parameter: 429 self._match(TokenType.L_BRACE) 430 this = self._parse_identifier() or self._parse_primary_or_var() 431 expression = self._match(TokenType.COLON) and ( 432 self._parse_identifier() or self._parse_primary_or_var() 433 ) 434 self._match(TokenType.R_BRACE) 435 return self.expression(exp.Parameter, this=this, expression=expression)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ADD_MONTHS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AddMonths'>>, 'ANONYMOUS_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnonymousAggFunc'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONSTRUCT_COMPACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConstructCompact'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'ARRAY_HAS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'ARRAY_CONTAINS_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContainsAll'>>, 'ARRAY_HAS_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContainsAll'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_OVERLAPS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayOverlaps'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_TO_STRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayToString'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayToString'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'ARRAY_UNIQUE_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CBRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cbrt'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'COMBINED_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedAggFunc'>>, 'COMBINED_PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedParameterizedAgg'>>, 'CONCAT': <function Parser.<lambda>>, 'CONCAT_WS': <function Parser.<lambda>>, 'CONNECT_BY_ROOT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConnectByRoot'>>, 'CONVERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Convert'>>, 'CORR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Corr'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'COUNTIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'COVAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CovarPop'>>, 'COVAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CovarSamp'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATE_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <function _build_with_ignore_nulls.<locals>._parse>, 'FIRST_VALUE': <function _build_with_ignore_nulls.<locals>._parse>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GAP_FILL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GapFill'>>, 'GENERATE_DATE_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateDateArray'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <function build_hex>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'IIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_INF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'ISINF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <function build_extract_json_with_path.<locals>._builder>, 'JSON_EXTRACT_SCALAR': <function build_extract_json_with_path.<locals>._builder>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_OBJECT_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObjectAgg'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lag'>>, 'LAST': <function _build_with_ignore_nulls.<locals>._parse>, 'LAST_DAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_VALUE': <function _build_with_ignore_nulls.<locals>._parse>, 'LEAD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lead'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.List'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <function build_logarithm>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <function build_lower>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LOWER_HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LowerHex'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function build_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NTH_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NthValue'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'QUARTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quarter'>>, 'RAND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDOM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Randn'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SIGN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sign'>>, 'SIGNUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sign'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRING_TO_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StringToArray'>>, 'SPLIT_BY_STRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StringToArray'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIMEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMPDIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMPFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToArray'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TO_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToMap'>>, 'TO_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToNumber'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Try'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsDiff'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'TS_OR_DS_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToTime'>>, 'TS_OR_DS_TO_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToTimestamp'>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixDate'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <function build_upper>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function build_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'JSON_EXTRACT_PATH_TEXT': <function build_extract_json_with_path.<locals>._builder>, 'LIKE': <function build_like>, 'LOG2': <function Parser.<lambda>>, 'LOG10': <function Parser.<lambda>>, 'MOD': <function build_mod>, 'TO_HEX': <function build_hex>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function build_formatted_time.<locals>._builder>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function build_formatted_time.<locals>._builder>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function Hive.Parser.<lambda>>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALLOWED_VALUES': <function Parser.<lambda>>, 'ALGORITHM': <function Parser.<lambda>>, 'AUTO': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BACKUP': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'CONTAINS': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DATA_DELETION': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'GLOBAL': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'ICEBERG': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INHERITS': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MODIFIES': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'READS': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'STRICT': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SHARING': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'UNLOGGED': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTER
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_JSON_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
437 class Generator(generator.Generator): 438 LIMIT_FETCH = "LIMIT" 439 TABLESAMPLE_WITH_METHOD = False 440 JOIN_HINTS = False 441 TABLE_HINTS = False 442 QUERY_HINTS = False 443 INDEX_ON = "ON TABLE" 444 EXTRACT_ALLOWS_QUOTES = False 445 NVL2_SUPPORTED = False 446 LAST_DAY_SUPPORTS_DATE_PART = False 447 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 448 SUPPORTS_TO_NUMBER = False 449 WITH_PROPERTIES_PREFIX = "TBLPROPERTIES" 450 451 EXPRESSIONS_WITHOUT_NESTED_CTES = { 452 exp.Insert, 453 exp.Select, 454 exp.Subquery, 455 exp.Union, 456 } 457 458 SUPPORTED_JSON_PATH_PARTS = { 459 exp.JSONPathKey, 460 exp.JSONPathRoot, 461 exp.JSONPathSubscript, 462 exp.JSONPathWildcard, 463 } 464 465 TYPE_MAPPING = { 466 **generator.Generator.TYPE_MAPPING, 467 exp.DataType.Type.BIT: "BOOLEAN", 468 exp.DataType.Type.DATETIME: "TIMESTAMP", 469 exp.DataType.Type.ROWVERSION: "BINARY", 470 exp.DataType.Type.TEXT: "STRING", 471 exp.DataType.Type.TIME: "TIMESTAMP", 472 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 473 exp.DataType.Type.UTINYINT: "SMALLINT", 474 exp.DataType.Type.VARBINARY: "BINARY", 475 } 476 477 TRANSFORMS = { 478 **generator.Generator.TRANSFORMS, 479 exp.Group: transforms.preprocess([transforms.unalias_group]), 480 exp.Select: transforms.preprocess( 481 [ 482 transforms.eliminate_qualify, 483 transforms.eliminate_distinct_on, 484 transforms.unnest_to_explode, 485 ] 486 ), 487 exp.Property: _property_sql, 488 exp.AnyValue: rename_func("FIRST"), 489 exp.ApproxDistinct: approx_count_distinct_sql, 490 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 491 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 492 exp.ArrayConcat: rename_func("CONCAT"), 493 exp.ArrayToString: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 494 exp.ArraySize: rename_func("SIZE"), 495 exp.ArraySort: _array_sort_sql, 496 exp.With: no_recursive_cte_sql, 497 exp.DateAdd: _add_date_sql, 498 exp.DateDiff: _date_diff_sql, 499 exp.DateStrToDate: datestrtodate_sql, 500 exp.DateSub: _add_date_sql, 501 exp.DateToDi: lambda self, 502 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 503 exp.DiToDate: lambda self, 504 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 505 exp.FileFormatProperty: lambda self, 506 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 507 exp.FromBase64: rename_func("UNBASE64"), 508 exp.If: if_sql(), 509 exp.ILike: no_ilike_sql, 510 exp.IsNan: rename_func("ISNAN"), 511 exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression), 512 exp.JSONExtractScalar: lambda self, e: self.func( 513 "GET_JSON_OBJECT", e.this, e.expression 514 ), 515 exp.JSONFormat: _json_format_sql, 516 exp.Left: left_to_substring_sql, 517 exp.Map: var_map_sql, 518 exp.Max: max_or_greatest, 519 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 520 exp.Min: min_or_least, 521 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 522 exp.NotNullColumnConstraint: lambda _, e: ( 523 "" if e.args.get("allow_null") else "NOT NULL" 524 ), 525 exp.VarMap: var_map_sql, 526 exp.Create: preprocess( 527 [ 528 remove_unique_constraints, 529 ctas_with_tmp_tables_to_create_tmp_view, 530 move_schema_columns_to_partitioned_by, 531 ] 532 ), 533 exp.Quantile: rename_func("PERCENTILE"), 534 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 535 exp.RegexpExtract: regexp_extract_sql, 536 exp.RegexpReplace: regexp_replace_sql, 537 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 538 exp.RegexpSplit: rename_func("SPLIT"), 539 exp.Right: right_to_substring_sql, 540 exp.SafeDivide: no_safe_divide_sql, 541 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 542 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 543 exp.Split: lambda self, e: self.func( 544 "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression) 545 ), 546 exp.StrPosition: strposition_to_locate_sql, 547 exp.StrToDate: _str_to_date_sql, 548 exp.StrToTime: _str_to_time_sql, 549 exp.StrToUnix: _str_to_unix_sql, 550 exp.StructExtract: struct_extract_sql, 551 exp.TimeStrToDate: rename_func("TO_DATE"), 552 exp.TimeStrToTime: timestrtotime_sql, 553 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 554 exp.TimestampTrunc: lambda self, e: self.func("TRUNC", e.this, unit_to_str(e)), 555 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 556 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 557 exp.ToBase64: rename_func("BASE64"), 558 exp.TsOrDiToDi: lambda self, 559 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 560 exp.TsOrDsAdd: _add_date_sql, 561 exp.TsOrDsDiff: _date_diff_sql, 562 exp.TsOrDsToDate: _to_date_sql, 563 exp.TryCast: no_trycast_sql, 564 exp.UnixToStr: lambda self, e: self.func( 565 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 566 ), 567 exp.UnixToTime: _unix_to_time_sql, 568 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 569 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 570 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 571 exp.National: lambda self, e: self.national_sql(e, prefix=""), 572 exp.ClusteredColumnConstraint: lambda self, 573 e: f"({self.expressions(e, 'this', indent=False)})", 574 exp.NonClusteredColumnConstraint: lambda self, 575 e: f"({self.expressions(e, 'this', indent=False)})", 576 exp.NotForReplicationColumnConstraint: lambda *_: "", 577 exp.OnProperty: lambda *_: "", 578 exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY", 579 exp.ParseJSON: lambda self, e: self.sql(e.this), 580 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 581 exp.DayOfMonth: rename_func("DAYOFMONTH"), 582 exp.DayOfWeek: rename_func("DAYOFWEEK"), 583 } 584 585 PROPERTIES_LOCATION = { 586 **generator.Generator.PROPERTIES_LOCATION, 587 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 588 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 589 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 590 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 591 } 592 593 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 594 if isinstance(expression.this, exp.JSONPathWildcard): 595 self.unsupported("Unsupported wildcard in JSONPathKey expression") 596 return "" 597 598 return super()._jsonpathkey_sql(expression) 599 600 def parameter_sql(self, expression: exp.Parameter) -> str: 601 this = self.sql(expression, "this") 602 expression_sql = self.sql(expression, "expression") 603 604 parent = expression.parent 605 this = f"{this}:{expression_sql}" if expression_sql else this 606 607 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 608 # We need to produce SET key = value instead of SET ${key} = value 609 return this 610 611 return f"${{{this}}}" 612 613 def schema_sql(self, expression: exp.Schema) -> str: 614 for ordered in expression.find_all(exp.Ordered): 615 if ordered.args.get("desc") is False: 616 ordered.set("desc", None) 617 618 return super().schema_sql(expression) 619 620 def constraint_sql(self, expression: exp.Constraint) -> str: 621 for prop in list(expression.find_all(exp.Properties)): 622 prop.pop() 623 624 this = self.sql(expression, "this") 625 expressions = self.expressions(expression, sep=" ", flat=True) 626 return f"CONSTRAINT {this} {expressions}" 627 628 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 629 serde_props = self.sql(expression, "serde_properties") 630 serde_props = f" {serde_props}" if serde_props else "" 631 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 632 633 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 634 return self.func( 635 "COLLECT_LIST", 636 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 637 ) 638 639 def datatype_sql(self, expression: exp.DataType) -> str: 640 if expression.this in self.PARAMETERIZABLE_TEXT_TYPES and ( 641 not expression.expressions or expression.expressions[0].name == "MAX" 642 ): 643 expression = exp.DataType.build("text") 644 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 645 expression.set("this", exp.DataType.Type.VARCHAR) 646 elif expression.this in exp.DataType.TEMPORAL_TYPES: 647 expression = exp.DataType.build(expression.this) 648 elif expression.is_type("float"): 649 size_expression = expression.find(exp.DataTypeParam) 650 if size_expression: 651 size = int(size_expression.name) 652 expression = ( 653 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 654 ) 655 656 return super().datatype_sql(expression) 657 658 def version_sql(self, expression: exp.Version) -> str: 659 sql = super().version_sql(expression) 660 return sql.replace("FOR ", "", 1) 661 662 def struct_sql(self, expression: exp.Struct) -> str: 663 values = [] 664 665 for i, e in enumerate(expression.expressions): 666 if isinstance(e, exp.PropertyEQ): 667 self.unsupported("Hive does not support named structs.") 668 values.append(e.expression) 669 else: 670 values.append(e) 671 672 return self.func("STRUCT", *values) 673 674 def alterset_sql(self, expression: exp.AlterSet) -> str: 675 exprs = self.expressions(expression, flat=True) 676 exprs = f" {exprs}" if exprs else "" 677 location = self.sql(expression, "location") 678 location = f" LOCATION {location}" if location else "" 679 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 680 file_format = f" FILEFORMAT {file_format}" if file_format else "" 681 serde = self.sql(expression, "serde") 682 serde = f" SERDE {serde}" if serde else "" 683 tags = self.expressions(expression, key="tag", flat=True, sep="") 684 tags = f" TAGS {tags}" if tags else "" 685 686 return f"SET{serde}{exprs}{location}{file_format}{tags}" 687 688 def serdeproperties_sql(self, expression: exp.SerdeProperties) -> str: 689 prefix = "WITH " if expression.args.get("with") else "" 690 exprs = self.expressions(expression, flat=True) 691 692 return f"{prefix}SERDEPROPERTIES ({exprs})"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
EXPRESSIONS_WITHOUT_NESTED_CTES =
{<class 'sqlglot.expressions.Select'>, <class 'sqlglot.expressions.Insert'>, <class 'sqlglot.expressions.Union'>, <class 'sqlglot.expressions.Subquery'>}
SUPPORTED_JSON_PATH_PARTS =
{<class 'sqlglot.expressions.JSONPathKey'>, <class 'sqlglot.expressions.JSONPathRoot'>, <class 'sqlglot.expressions.JSONPathWildcard'>, <class 'sqlglot.expressions.JSONPathSubscript'>}
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.ROWVERSION: 'ROWVERSION'>: 'BINARY', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.UTINYINT: 'UTINYINT'>: 'SMALLINT', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.JSONPathKey'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathRoot'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathSubscript'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathWildcard'>: <function <lambda>>, <class 'sqlglot.expressions.AllowedValuesProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.BackupProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EphemeralColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExcludeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.GlobalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IcebergProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InheritsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ProjectionPolicyColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetConfigProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SharingProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StrictProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TagColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Timestamp'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToMap'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UnloggedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.ViewAttributeProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithOperator'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayToString'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function datestrtodate_sql>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArrayUniqueAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimestampTrunc'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function _unix_to_time_sql>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ParseJSON'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.WeekOfYear'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DayOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DayOfWeek'>: <function rename_func.<locals>.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AllowedValuesProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BackupProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DataDeletionProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.GlobalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InheritsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IcebergProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SetConfigProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SharingProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.SequenceProperties'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.StrictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.UnloggedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ViewAttributeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
600 def parameter_sql(self, expression: exp.Parameter) -> str: 601 this = self.sql(expression, "this") 602 expression_sql = self.sql(expression, "expression") 603 604 parent = expression.parent 605 this = f"{this}:{expression_sql}" if expression_sql else this 606 607 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 608 # We need to produce SET key = value instead of SET ${key} = value 609 return this 610 611 return f"${{{this}}}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
639 def datatype_sql(self, expression: exp.DataType) -> str: 640 if expression.this in self.PARAMETERIZABLE_TEXT_TYPES and ( 641 not expression.expressions or expression.expressions[0].name == "MAX" 642 ): 643 expression = exp.DataType.build("text") 644 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 645 expression.set("this", exp.DataType.Type.VARCHAR) 646 elif expression.this in exp.DataType.TEMPORAL_TYPES: 647 expression = exp.DataType.build(expression.this) 648 elif expression.is_type("float"): 649 size_expression = expression.find(exp.DataTypeParam) 650 if size_expression: 651 size = int(size_expression.name) 652 expression = ( 653 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 654 ) 655 656 return super().datatype_sql(expression)
662 def struct_sql(self, expression: exp.Struct) -> str: 663 values = [] 664 665 for i, e in enumerate(expression.expressions): 666 if isinstance(e, exp.PropertyEQ): 667 self.unsupported("Hive does not support named structs.") 668 values.append(e.expression) 669 else: 670 values.append(e) 671 672 return self.func("STRUCT", *values)
674 def alterset_sql(self, expression: exp.AlterSet) -> str: 675 exprs = self.expressions(expression, flat=True) 676 exprs = f" {exprs}" if exprs else "" 677 location = self.sql(expression, "location") 678 location = f" LOCATION {location}" if location else "" 679 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 680 file_format = f" FILEFORMAT {file_format}" if file_format else "" 681 serde = self.sql(expression, "serde") 682 serde = f" SERDE {serde}" if serde else "" 683 tags = self.expressions(expression, key="tag", flat=True, sep="") 684 tags = f" TAGS {tags}" if tags else "" 685 686 return f"SET{serde}{exprs}{location}{file_format}{tags}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- CAN_IMPLEMENT_ARRAY_ANY
- OUTER_UNION_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- TIME_PART_SINGULARS
- AFTER_HAVING_MODIFIER_TRANSFORMS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql