sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 DATE_ADD_OR_SUB, 8 Dialect, 9 NormalizationStrategy, 10 approx_count_distinct_sql, 11 arg_max_or_min_no_count, 12 create_with_partitions_sql, 13 datestrtodate_sql, 14 format_time_lambda, 15 if_sql, 16 is_parse_json, 17 left_to_substring_sql, 18 locate_to_strposition, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 no_recursive_cte_sql, 23 no_safe_divide_sql, 24 no_trycast_sql, 25 regexp_extract_sql, 26 regexp_replace_sql, 27 rename_func, 28 right_to_substring_sql, 29 strposition_to_locate_sql, 30 struct_extract_sql, 31 time_format, 32 timestrtotime_sql, 33 var_map_sql, 34) 35from sqlglot.helper import seq_get 36from sqlglot.parser import parse_var_map 37from sqlglot.tokens import TokenType 38 39# (FuncType, Multiplier) 40DATE_DELTA_INTERVAL = { 41 "YEAR": ("ADD_MONTHS", 12), 42 "MONTH": ("ADD_MONTHS", 1), 43 "QUARTER": ("ADD_MONTHS", 3), 44 "WEEK": ("DATE_ADD", 7), 45 "DAY": ("DATE_ADD", 1), 46} 47 48TIME_DIFF_FACTOR = { 49 "MILLISECOND": " * 1000", 50 "SECOND": "", 51 "MINUTE": " / 60", 52 "HOUR": " / 3600", 53} 54 55DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 56 57 58def _create_sql(self, expression: exp.Create) -> str: 59 # remove UNIQUE column constraints 60 for constraint in expression.find_all(exp.UniqueColumnConstraint): 61 if constraint.parent: 62 constraint.parent.pop() 63 64 properties = expression.args.get("properties") 65 temporary = any( 66 isinstance(prop, exp.TemporaryProperty) 67 for prop in (properties.expressions if properties else []) 68 ) 69 70 # CTAS with temp tables map to CREATE TEMPORARY VIEW 71 kind = expression.args["kind"] 72 if kind.upper() == "TABLE" and temporary: 73 if expression.expression: 74 return f"CREATE TEMPORARY VIEW {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" 75 else: 76 # CREATE TEMPORARY TABLE may require storage provider 77 expression = self.temporary_storage_provider(expression) 78 79 return create_with_partitions_sql(self, expression) 80 81 82def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str: 83 if isinstance(expression, exp.TsOrDsAdd) and not expression.unit: 84 return self.func("DATE_ADD", expression.this, expression.expression) 85 86 unit = expression.text("unit").upper() 87 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 88 89 if isinstance(expression, exp.DateSub): 90 multiplier *= -1 91 92 if expression.expression.is_number: 93 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 94 else: 95 modified_increment = expression.expression 96 if multiplier != 1: 97 modified_increment = exp.Mul( # type: ignore 98 this=modified_increment, expression=exp.Literal.number(multiplier) 99 ) 100 101 return self.func(func, expression.this, modified_increment) 102 103 104def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str: 105 unit = expression.text("unit").upper() 106 107 factor = TIME_DIFF_FACTOR.get(unit) 108 if factor is not None: 109 left = self.sql(expression, "this") 110 right = self.sql(expression, "expression") 111 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 112 return f"({sec_diff}){factor}" if factor else sec_diff 113 114 months_between = unit in DIFF_MONTH_SWITCH 115 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 116 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 117 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 118 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 119 120 if months_between or multiplier_sql: 121 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part. 122 # For the same reason, we want to truncate if there's a divisor present. 123 diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)" 124 125 return diff_sql 126 127 128def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 129 this = expression.this 130 131 if is_parse_json(this): 132 if this.this.is_string: 133 # Since FROM_JSON requires a nested type, we always wrap the json string with 134 # an array to ensure that "naked" strings like "'a'" will be handled correctly 135 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 136 137 from_json = self.func( 138 "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json) 139 ) 140 to_json = self.func("TO_JSON", from_json) 141 142 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 143 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 144 return self.sql(this) 145 146 return self.func("TO_JSON", this, expression.args.get("options")) 147 148 149def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 150 if expression.expression: 151 self.unsupported("Hive SORT_ARRAY does not support a comparator") 152 return f"SORT_ARRAY({self.sql(expression, 'this')})" 153 154 155def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 156 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 157 158 159def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 160 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 161 162 163def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 164 this = self.sql(expression, "this") 165 time_format = self.format_time(expression) 166 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 167 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 168 return f"CAST({this} AS DATE)" 169 170 171def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 172 this = self.sql(expression, "this") 173 time_format = self.format_time(expression) 174 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 175 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 176 return f"CAST({this} AS TIMESTAMP)" 177 178 179def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 180 this = self.sql(expression, "this") 181 time_format = self.format_time(expression) 182 return f"DATE_FORMAT({this}, {time_format})" 183 184 185def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 186 this = self.sql(expression, "this") 187 time_format = self.format_time(expression) 188 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 189 return f"TO_DATE({this}, {time_format})" 190 if isinstance(expression.this, exp.TsOrDsToDate): 191 return this 192 return f"TO_DATE({this})" 193 194 195def _parse_ignore_nulls( 196 exp_class: t.Type[exp.Expression], 197) -> t.Callable[[t.List[exp.Expression]], exp.Expression]: 198 def _parse(args: t.List[exp.Expression]) -> exp.Expression: 199 this = exp_class(this=seq_get(args, 0)) 200 if seq_get(args, 1) == exp.true(): 201 return exp.IgnoreNulls(this=this) 202 return this 203 204 return _parse 205 206 207class Hive(Dialect): 208 ALIAS_POST_TABLESAMPLE = True 209 IDENTIFIERS_CAN_START_WITH_DIGIT = True 210 SUPPORTS_USER_DEFINED_TYPES = False 211 SAFE_DIVISION = True 212 213 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 214 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 215 216 TIME_MAPPING = { 217 "y": "%Y", 218 "Y": "%Y", 219 "YYYY": "%Y", 220 "yyyy": "%Y", 221 "YY": "%y", 222 "yy": "%y", 223 "MMMM": "%B", 224 "MMM": "%b", 225 "MM": "%m", 226 "M": "%-m", 227 "dd": "%d", 228 "d": "%-d", 229 "HH": "%H", 230 "H": "%-H", 231 "hh": "%I", 232 "h": "%-I", 233 "mm": "%M", 234 "m": "%-M", 235 "ss": "%S", 236 "s": "%-S", 237 "SSSSSS": "%f", 238 "a": "%p", 239 "DD": "%j", 240 "D": "%-j", 241 "E": "%a", 242 "EE": "%a", 243 "EEE": "%a", 244 "EEEE": "%A", 245 } 246 247 DATE_FORMAT = "'yyyy-MM-dd'" 248 DATEINT_FORMAT = "'yyyyMMdd'" 249 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 250 251 class Tokenizer(tokens.Tokenizer): 252 QUOTES = ["'", '"'] 253 IDENTIFIERS = ["`"] 254 STRING_ESCAPES = ["\\"] 255 256 SINGLE_TOKENS = { 257 **tokens.Tokenizer.SINGLE_TOKENS, 258 "$": TokenType.PARAMETER, 259 } 260 261 KEYWORDS = { 262 **tokens.Tokenizer.KEYWORDS, 263 "ADD ARCHIVE": TokenType.COMMAND, 264 "ADD ARCHIVES": TokenType.COMMAND, 265 "ADD FILE": TokenType.COMMAND, 266 "ADD FILES": TokenType.COMMAND, 267 "ADD JAR": TokenType.COMMAND, 268 "ADD JARS": TokenType.COMMAND, 269 "MSCK REPAIR": TokenType.COMMAND, 270 "REFRESH": TokenType.REFRESH, 271 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 272 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 273 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 274 } 275 276 NUMERIC_LITERALS = { 277 "L": "BIGINT", 278 "S": "SMALLINT", 279 "Y": "TINYINT", 280 "D": "DOUBLE", 281 "F": "FLOAT", 282 "BD": "DECIMAL", 283 } 284 285 class Parser(parser.Parser): 286 LOG_DEFAULTS_TO_LN = True 287 STRICT_CAST = False 288 289 FUNCTIONS = { 290 **parser.Parser.FUNCTIONS, 291 "BASE64": exp.ToBase64.from_arg_list, 292 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 293 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 294 "DATE_ADD": lambda args: exp.TsOrDsAdd( 295 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 296 ), 297 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 298 [ 299 exp.TimeStrToTime(this=seq_get(args, 0)), 300 seq_get(args, 1), 301 ] 302 ), 303 "DATE_SUB": lambda args: exp.TsOrDsAdd( 304 this=seq_get(args, 0), 305 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 306 unit=exp.Literal.string("DAY"), 307 ), 308 "DATEDIFF": lambda args: exp.DateDiff( 309 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 310 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 311 ), 312 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 313 "FIRST": _parse_ignore_nulls(exp.First), 314 "FIRST_VALUE": _parse_ignore_nulls(exp.FirstValue), 315 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 316 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 317 "LAST": _parse_ignore_nulls(exp.Last), 318 "LAST_VALUE": _parse_ignore_nulls(exp.LastValue), 319 "LOCATE": locate_to_strposition, 320 "MAP": parse_var_map, 321 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 322 "PERCENTILE": exp.Quantile.from_arg_list, 323 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 324 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 325 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 326 ), 327 "SIZE": exp.ArraySize.from_arg_list, 328 "SPLIT": exp.RegexpSplit.from_arg_list, 329 "STR_TO_MAP": lambda args: exp.StrToMap( 330 this=seq_get(args, 0), 331 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 332 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 333 ), 334 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 335 "TO_JSON": exp.JSONFormat.from_arg_list, 336 "UNBASE64": exp.FromBase64.from_arg_list, 337 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 338 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 339 } 340 341 NO_PAREN_FUNCTION_PARSERS = { 342 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 343 "TRANSFORM": lambda self: self._parse_transform(), 344 } 345 346 PROPERTY_PARSERS = { 347 **parser.Parser.PROPERTY_PARSERS, 348 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 349 expressions=self._parse_wrapped_csv(self._parse_property) 350 ), 351 } 352 353 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 354 if not self._match(TokenType.L_PAREN, advance=False): 355 self._retreat(self._index - 1) 356 return None 357 358 args = self._parse_wrapped_csv(self._parse_lambda) 359 row_format_before = self._parse_row_format(match_row=True) 360 361 record_writer = None 362 if self._match_text_seq("RECORDWRITER"): 363 record_writer = self._parse_string() 364 365 if not self._match(TokenType.USING): 366 return exp.Transform.from_arg_list(args) 367 368 command_script = self._parse_string() 369 370 self._match(TokenType.ALIAS) 371 schema = self._parse_schema() 372 373 row_format_after = self._parse_row_format(match_row=True) 374 record_reader = None 375 if self._match_text_seq("RECORDREADER"): 376 record_reader = self._parse_string() 377 378 return self.expression( 379 exp.QueryTransform, 380 expressions=args, 381 command_script=command_script, 382 schema=schema, 383 row_format_before=row_format_before, 384 record_writer=record_writer, 385 row_format_after=row_format_after, 386 record_reader=record_reader, 387 ) 388 389 def _parse_types( 390 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 391 ) -> t.Optional[exp.Expression]: 392 """ 393 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 394 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 395 396 spark-sql (default)> select cast(1234 as varchar(2)); 397 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 398 char/varchar type and simply treats them as string type. Please use string type 399 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 400 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 401 402 1234 403 Time taken: 4.265 seconds, Fetched 1 row(s) 404 405 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 406 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 407 408 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 409 """ 410 this = super()._parse_types( 411 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 412 ) 413 414 if this and not schema: 415 return this.transform( 416 lambda node: ( 417 node.replace(exp.DataType.build("text")) 418 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 419 else node 420 ), 421 copy=False, 422 ) 423 424 return this 425 426 def _parse_partition_and_order( 427 self, 428 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 429 return ( 430 ( 431 self._parse_csv(self._parse_conjunction) 432 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 433 else [] 434 ), 435 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 436 ) 437 438 class Generator(generator.Generator): 439 LIMIT_FETCH = "LIMIT" 440 TABLESAMPLE_WITH_METHOD = False 441 JOIN_HINTS = False 442 TABLE_HINTS = False 443 QUERY_HINTS = False 444 INDEX_ON = "ON TABLE" 445 EXTRACT_ALLOWS_QUOTES = False 446 NVL2_SUPPORTED = False 447 LAST_DAY_SUPPORTS_DATE_PART = False 448 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 449 450 EXPRESSIONS_WITHOUT_NESTED_CTES = { 451 exp.Insert, 452 exp.Select, 453 exp.Subquery, 454 exp.Union, 455 } 456 457 SUPPORTED_JSON_PATH_PARTS = { 458 exp.JSONPathKey, 459 exp.JSONPathRoot, 460 exp.JSONPathSubscript, 461 exp.JSONPathWildcard, 462 } 463 464 TYPE_MAPPING = { 465 **generator.Generator.TYPE_MAPPING, 466 exp.DataType.Type.BIT: "BOOLEAN", 467 exp.DataType.Type.DATETIME: "TIMESTAMP", 468 exp.DataType.Type.TEXT: "STRING", 469 exp.DataType.Type.TIME: "TIMESTAMP", 470 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 471 exp.DataType.Type.VARBINARY: "BINARY", 472 } 473 474 TRANSFORMS = { 475 **generator.Generator.TRANSFORMS, 476 exp.Group: transforms.preprocess([transforms.unalias_group]), 477 exp.Select: transforms.preprocess( 478 [ 479 transforms.eliminate_qualify, 480 transforms.eliminate_distinct_on, 481 transforms.unnest_to_explode, 482 ] 483 ), 484 exp.Property: _property_sql, 485 exp.AnyValue: rename_func("FIRST"), 486 exp.ApproxDistinct: approx_count_distinct_sql, 487 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 488 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 489 exp.ArrayConcat: rename_func("CONCAT"), 490 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 491 exp.ArraySize: rename_func("SIZE"), 492 exp.ArraySort: _array_sort_sql, 493 exp.With: no_recursive_cte_sql, 494 exp.DateAdd: _add_date_sql, 495 exp.DateDiff: _date_diff_sql, 496 exp.DateStrToDate: datestrtodate_sql, 497 exp.DateSub: _add_date_sql, 498 exp.DateToDi: lambda self, 499 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 500 exp.DiToDate: lambda self, 501 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 502 exp.FileFormatProperty: lambda self, 503 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 504 exp.FromBase64: rename_func("UNBASE64"), 505 exp.If: if_sql(), 506 exp.ILike: no_ilike_sql, 507 exp.IsNan: rename_func("ISNAN"), 508 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 509 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 510 exp.JSONFormat: _json_format_sql, 511 exp.Left: left_to_substring_sql, 512 exp.Map: var_map_sql, 513 exp.Max: max_or_greatest, 514 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 515 exp.Min: min_or_least, 516 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 517 exp.NotNullColumnConstraint: lambda self, e: ( 518 "" if e.args.get("allow_null") else "NOT NULL" 519 ), 520 exp.VarMap: var_map_sql, 521 exp.Create: _create_sql, 522 exp.Quantile: rename_func("PERCENTILE"), 523 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 524 exp.RegexpExtract: regexp_extract_sql, 525 exp.RegexpReplace: regexp_replace_sql, 526 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 527 exp.RegexpSplit: rename_func("SPLIT"), 528 exp.Right: right_to_substring_sql, 529 exp.SafeDivide: no_safe_divide_sql, 530 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 531 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 532 exp.Split: lambda self, 533 e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 534 exp.StrPosition: strposition_to_locate_sql, 535 exp.StrToDate: _str_to_date_sql, 536 exp.StrToTime: _str_to_time_sql, 537 exp.StrToUnix: _str_to_unix_sql, 538 exp.StructExtract: struct_extract_sql, 539 exp.TimeStrToDate: rename_func("TO_DATE"), 540 exp.TimeStrToTime: timestrtotime_sql, 541 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 542 exp.TimeToStr: _time_to_str, 543 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 544 exp.ToBase64: rename_func("BASE64"), 545 exp.TsOrDiToDi: lambda self, 546 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 547 exp.TsOrDsAdd: _add_date_sql, 548 exp.TsOrDsDiff: _date_diff_sql, 549 exp.TsOrDsToDate: _to_date_sql, 550 exp.TryCast: no_trycast_sql, 551 exp.UnixToStr: lambda self, e: self.func( 552 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 553 ), 554 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 555 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 556 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 557 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 558 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 559 exp.National: lambda self, e: self.national_sql(e, prefix=""), 560 exp.ClusteredColumnConstraint: lambda self, 561 e: f"({self.expressions(e, 'this', indent=False)})", 562 exp.NonClusteredColumnConstraint: lambda self, 563 e: f"({self.expressions(e, 'this', indent=False)})", 564 exp.NotForReplicationColumnConstraint: lambda self, e: "", 565 exp.OnProperty: lambda self, e: "", 566 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 567 } 568 569 PROPERTIES_LOCATION = { 570 **generator.Generator.PROPERTIES_LOCATION, 571 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 572 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 573 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 574 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 575 } 576 577 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 578 if isinstance(expression.this, exp.JSONPathWildcard): 579 self.unsupported("Unsupported wildcard in JSONPathKey expression") 580 return "" 581 582 return super()._jsonpathkey_sql(expression) 583 584 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 585 # Hive has no temporary storage provider (there are hive settings though) 586 return expression 587 588 def parameter_sql(self, expression: exp.Parameter) -> str: 589 this = self.sql(expression, "this") 590 expression_sql = self.sql(expression, "expression") 591 592 parent = expression.parent 593 this = f"{this}:{expression_sql}" if expression_sql else this 594 595 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 596 # We need to produce SET key = value instead of SET ${key} = value 597 return this 598 599 return f"${{{this}}}" 600 601 def schema_sql(self, expression: exp.Schema) -> str: 602 for ordered in expression.find_all(exp.Ordered): 603 if ordered.args.get("desc") is False: 604 ordered.set("desc", None) 605 606 return super().schema_sql(expression) 607 608 def constraint_sql(self, expression: exp.Constraint) -> str: 609 for prop in list(expression.find_all(exp.Properties)): 610 prop.pop() 611 612 this = self.sql(expression, "this") 613 expressions = self.expressions(expression, sep=" ", flat=True) 614 return f"CONSTRAINT {this} {expressions}" 615 616 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 617 serde_props = self.sql(expression, "serde_properties") 618 serde_props = f" {serde_props}" if serde_props else "" 619 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 620 621 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 622 return self.func( 623 "COLLECT_LIST", 624 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 625 ) 626 627 def with_properties(self, properties: exp.Properties) -> str: 628 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 629 630 def datatype_sql(self, expression: exp.DataType) -> str: 631 if ( 632 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 633 and not expression.expressions 634 ): 635 expression = exp.DataType.build("text") 636 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 637 expression.set("this", exp.DataType.Type.VARCHAR) 638 elif expression.this in exp.DataType.TEMPORAL_TYPES: 639 expression = exp.DataType.build(expression.this) 640 elif expression.is_type("float"): 641 size_expression = expression.find(exp.DataTypeParam) 642 if size_expression: 643 size = int(size_expression.name) 644 expression = ( 645 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 646 ) 647 648 return super().datatype_sql(expression) 649 650 def version_sql(self, expression: exp.Version) -> str: 651 sql = super().version_sql(expression) 652 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
208class Hive(Dialect): 209 ALIAS_POST_TABLESAMPLE = True 210 IDENTIFIERS_CAN_START_WITH_DIGIT = True 211 SUPPORTS_USER_DEFINED_TYPES = False 212 SAFE_DIVISION = True 213 214 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 215 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 216 217 TIME_MAPPING = { 218 "y": "%Y", 219 "Y": "%Y", 220 "YYYY": "%Y", 221 "yyyy": "%Y", 222 "YY": "%y", 223 "yy": "%y", 224 "MMMM": "%B", 225 "MMM": "%b", 226 "MM": "%m", 227 "M": "%-m", 228 "dd": "%d", 229 "d": "%-d", 230 "HH": "%H", 231 "H": "%-H", 232 "hh": "%I", 233 "h": "%-I", 234 "mm": "%M", 235 "m": "%-M", 236 "ss": "%S", 237 "s": "%-S", 238 "SSSSSS": "%f", 239 "a": "%p", 240 "DD": "%j", 241 "D": "%-j", 242 "E": "%a", 243 "EE": "%a", 244 "EEE": "%a", 245 "EEEE": "%A", 246 } 247 248 DATE_FORMAT = "'yyyy-MM-dd'" 249 DATEINT_FORMAT = "'yyyyMMdd'" 250 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 251 252 class Tokenizer(tokens.Tokenizer): 253 QUOTES = ["'", '"'] 254 IDENTIFIERS = ["`"] 255 STRING_ESCAPES = ["\\"] 256 257 SINGLE_TOKENS = { 258 **tokens.Tokenizer.SINGLE_TOKENS, 259 "$": TokenType.PARAMETER, 260 } 261 262 KEYWORDS = { 263 **tokens.Tokenizer.KEYWORDS, 264 "ADD ARCHIVE": TokenType.COMMAND, 265 "ADD ARCHIVES": TokenType.COMMAND, 266 "ADD FILE": TokenType.COMMAND, 267 "ADD FILES": TokenType.COMMAND, 268 "ADD JAR": TokenType.COMMAND, 269 "ADD JARS": TokenType.COMMAND, 270 "MSCK REPAIR": TokenType.COMMAND, 271 "REFRESH": TokenType.REFRESH, 272 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 273 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 274 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 275 } 276 277 NUMERIC_LITERALS = { 278 "L": "BIGINT", 279 "S": "SMALLINT", 280 "Y": "TINYINT", 281 "D": "DOUBLE", 282 "F": "FLOAT", 283 "BD": "DECIMAL", 284 } 285 286 class Parser(parser.Parser): 287 LOG_DEFAULTS_TO_LN = True 288 STRICT_CAST = False 289 290 FUNCTIONS = { 291 **parser.Parser.FUNCTIONS, 292 "BASE64": exp.ToBase64.from_arg_list, 293 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 294 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 295 "DATE_ADD": lambda args: exp.TsOrDsAdd( 296 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 297 ), 298 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 299 [ 300 exp.TimeStrToTime(this=seq_get(args, 0)), 301 seq_get(args, 1), 302 ] 303 ), 304 "DATE_SUB": lambda args: exp.TsOrDsAdd( 305 this=seq_get(args, 0), 306 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 307 unit=exp.Literal.string("DAY"), 308 ), 309 "DATEDIFF": lambda args: exp.DateDiff( 310 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 311 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 312 ), 313 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 314 "FIRST": _parse_ignore_nulls(exp.First), 315 "FIRST_VALUE": _parse_ignore_nulls(exp.FirstValue), 316 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 317 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 318 "LAST": _parse_ignore_nulls(exp.Last), 319 "LAST_VALUE": _parse_ignore_nulls(exp.LastValue), 320 "LOCATE": locate_to_strposition, 321 "MAP": parse_var_map, 322 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 323 "PERCENTILE": exp.Quantile.from_arg_list, 324 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 325 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 326 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 327 ), 328 "SIZE": exp.ArraySize.from_arg_list, 329 "SPLIT": exp.RegexpSplit.from_arg_list, 330 "STR_TO_MAP": lambda args: exp.StrToMap( 331 this=seq_get(args, 0), 332 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 333 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 334 ), 335 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 336 "TO_JSON": exp.JSONFormat.from_arg_list, 337 "UNBASE64": exp.FromBase64.from_arg_list, 338 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 339 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 340 } 341 342 NO_PAREN_FUNCTION_PARSERS = { 343 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 344 "TRANSFORM": lambda self: self._parse_transform(), 345 } 346 347 PROPERTY_PARSERS = { 348 **parser.Parser.PROPERTY_PARSERS, 349 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 350 expressions=self._parse_wrapped_csv(self._parse_property) 351 ), 352 } 353 354 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 355 if not self._match(TokenType.L_PAREN, advance=False): 356 self._retreat(self._index - 1) 357 return None 358 359 args = self._parse_wrapped_csv(self._parse_lambda) 360 row_format_before = self._parse_row_format(match_row=True) 361 362 record_writer = None 363 if self._match_text_seq("RECORDWRITER"): 364 record_writer = self._parse_string() 365 366 if not self._match(TokenType.USING): 367 return exp.Transform.from_arg_list(args) 368 369 command_script = self._parse_string() 370 371 self._match(TokenType.ALIAS) 372 schema = self._parse_schema() 373 374 row_format_after = self._parse_row_format(match_row=True) 375 record_reader = None 376 if self._match_text_seq("RECORDREADER"): 377 record_reader = self._parse_string() 378 379 return self.expression( 380 exp.QueryTransform, 381 expressions=args, 382 command_script=command_script, 383 schema=schema, 384 row_format_before=row_format_before, 385 record_writer=record_writer, 386 row_format_after=row_format_after, 387 record_reader=record_reader, 388 ) 389 390 def _parse_types( 391 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 392 ) -> t.Optional[exp.Expression]: 393 """ 394 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 395 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 396 397 spark-sql (default)> select cast(1234 as varchar(2)); 398 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 399 char/varchar type and simply treats them as string type. Please use string type 400 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 401 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 402 403 1234 404 Time taken: 4.265 seconds, Fetched 1 row(s) 405 406 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 407 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 408 409 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 410 """ 411 this = super()._parse_types( 412 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 413 ) 414 415 if this and not schema: 416 return this.transform( 417 lambda node: ( 418 node.replace(exp.DataType.build("text")) 419 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 420 else node 421 ), 422 copy=False, 423 ) 424 425 return this 426 427 def _parse_partition_and_order( 428 self, 429 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 430 return ( 431 ( 432 self._parse_csv(self._parse_conjunction) 433 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 434 else [] 435 ), 436 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 437 ) 438 439 class Generator(generator.Generator): 440 LIMIT_FETCH = "LIMIT" 441 TABLESAMPLE_WITH_METHOD = False 442 JOIN_HINTS = False 443 TABLE_HINTS = False 444 QUERY_HINTS = False 445 INDEX_ON = "ON TABLE" 446 EXTRACT_ALLOWS_QUOTES = False 447 NVL2_SUPPORTED = False 448 LAST_DAY_SUPPORTS_DATE_PART = False 449 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 450 451 EXPRESSIONS_WITHOUT_NESTED_CTES = { 452 exp.Insert, 453 exp.Select, 454 exp.Subquery, 455 exp.Union, 456 } 457 458 SUPPORTED_JSON_PATH_PARTS = { 459 exp.JSONPathKey, 460 exp.JSONPathRoot, 461 exp.JSONPathSubscript, 462 exp.JSONPathWildcard, 463 } 464 465 TYPE_MAPPING = { 466 **generator.Generator.TYPE_MAPPING, 467 exp.DataType.Type.BIT: "BOOLEAN", 468 exp.DataType.Type.DATETIME: "TIMESTAMP", 469 exp.DataType.Type.TEXT: "STRING", 470 exp.DataType.Type.TIME: "TIMESTAMP", 471 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 472 exp.DataType.Type.VARBINARY: "BINARY", 473 } 474 475 TRANSFORMS = { 476 **generator.Generator.TRANSFORMS, 477 exp.Group: transforms.preprocess([transforms.unalias_group]), 478 exp.Select: transforms.preprocess( 479 [ 480 transforms.eliminate_qualify, 481 transforms.eliminate_distinct_on, 482 transforms.unnest_to_explode, 483 ] 484 ), 485 exp.Property: _property_sql, 486 exp.AnyValue: rename_func("FIRST"), 487 exp.ApproxDistinct: approx_count_distinct_sql, 488 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 489 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 490 exp.ArrayConcat: rename_func("CONCAT"), 491 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 492 exp.ArraySize: rename_func("SIZE"), 493 exp.ArraySort: _array_sort_sql, 494 exp.With: no_recursive_cte_sql, 495 exp.DateAdd: _add_date_sql, 496 exp.DateDiff: _date_diff_sql, 497 exp.DateStrToDate: datestrtodate_sql, 498 exp.DateSub: _add_date_sql, 499 exp.DateToDi: lambda self, 500 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 501 exp.DiToDate: lambda self, 502 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 503 exp.FileFormatProperty: lambda self, 504 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 505 exp.FromBase64: rename_func("UNBASE64"), 506 exp.If: if_sql(), 507 exp.ILike: no_ilike_sql, 508 exp.IsNan: rename_func("ISNAN"), 509 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 510 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 511 exp.JSONFormat: _json_format_sql, 512 exp.Left: left_to_substring_sql, 513 exp.Map: var_map_sql, 514 exp.Max: max_or_greatest, 515 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 516 exp.Min: min_or_least, 517 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 518 exp.NotNullColumnConstraint: lambda self, e: ( 519 "" if e.args.get("allow_null") else "NOT NULL" 520 ), 521 exp.VarMap: var_map_sql, 522 exp.Create: _create_sql, 523 exp.Quantile: rename_func("PERCENTILE"), 524 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 525 exp.RegexpExtract: regexp_extract_sql, 526 exp.RegexpReplace: regexp_replace_sql, 527 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 528 exp.RegexpSplit: rename_func("SPLIT"), 529 exp.Right: right_to_substring_sql, 530 exp.SafeDivide: no_safe_divide_sql, 531 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 532 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 533 exp.Split: lambda self, 534 e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 535 exp.StrPosition: strposition_to_locate_sql, 536 exp.StrToDate: _str_to_date_sql, 537 exp.StrToTime: _str_to_time_sql, 538 exp.StrToUnix: _str_to_unix_sql, 539 exp.StructExtract: struct_extract_sql, 540 exp.TimeStrToDate: rename_func("TO_DATE"), 541 exp.TimeStrToTime: timestrtotime_sql, 542 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 543 exp.TimeToStr: _time_to_str, 544 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 545 exp.ToBase64: rename_func("BASE64"), 546 exp.TsOrDiToDi: lambda self, 547 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 548 exp.TsOrDsAdd: _add_date_sql, 549 exp.TsOrDsDiff: _date_diff_sql, 550 exp.TsOrDsToDate: _to_date_sql, 551 exp.TryCast: no_trycast_sql, 552 exp.UnixToStr: lambda self, e: self.func( 553 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 554 ), 555 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 556 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 557 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 558 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 559 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 560 exp.National: lambda self, e: self.national_sql(e, prefix=""), 561 exp.ClusteredColumnConstraint: lambda self, 562 e: f"({self.expressions(e, 'this', indent=False)})", 563 exp.NonClusteredColumnConstraint: lambda self, 564 e: f"({self.expressions(e, 'this', indent=False)})", 565 exp.NotForReplicationColumnConstraint: lambda self, e: "", 566 exp.OnProperty: lambda self, e: "", 567 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 568 } 569 570 PROPERTIES_LOCATION = { 571 **generator.Generator.PROPERTIES_LOCATION, 572 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 573 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 574 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 575 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 576 } 577 578 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 579 if isinstance(expression.this, exp.JSONPathWildcard): 580 self.unsupported("Unsupported wildcard in JSONPathKey expression") 581 return "" 582 583 return super()._jsonpathkey_sql(expression) 584 585 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 586 # Hive has no temporary storage provider (there are hive settings though) 587 return expression 588 589 def parameter_sql(self, expression: exp.Parameter) -> str: 590 this = self.sql(expression, "this") 591 expression_sql = self.sql(expression, "expression") 592 593 parent = expression.parent 594 this = f"{this}:{expression_sql}" if expression_sql else this 595 596 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 597 # We need to produce SET key = value instead of SET ${key} = value 598 return this 599 600 return f"${{{this}}}" 601 602 def schema_sql(self, expression: exp.Schema) -> str: 603 for ordered in expression.find_all(exp.Ordered): 604 if ordered.args.get("desc") is False: 605 ordered.set("desc", None) 606 607 return super().schema_sql(expression) 608 609 def constraint_sql(self, expression: exp.Constraint) -> str: 610 for prop in list(expression.find_all(exp.Properties)): 611 prop.pop() 612 613 this = self.sql(expression, "this") 614 expressions = self.expressions(expression, sep=" ", flat=True) 615 return f"CONSTRAINT {this} {expressions}" 616 617 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 618 serde_props = self.sql(expression, "serde_properties") 619 serde_props = f" {serde_props}" if serde_props else "" 620 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 621 622 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 623 return self.func( 624 "COLLECT_LIST", 625 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 626 ) 627 628 def with_properties(self, properties: exp.Properties) -> str: 629 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 630 631 def datatype_sql(self, expression: exp.DataType) -> str: 632 if ( 633 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 634 and not expression.expressions 635 ): 636 expression = exp.DataType.build("text") 637 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 638 expression.set("this", exp.DataType.Type.VARCHAR) 639 elif expression.this in exp.DataType.TEMPORAL_TYPES: 640 expression = exp.DataType.build(expression.this) 641 elif expression.is_type("float"): 642 size_expression = expression.find(exp.DataTypeParam) 643 if size_expression: 644 size = int(size_expression.name) 645 expression = ( 646 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 647 ) 648 649 return super().datatype_sql(expression) 650 651 def version_sql(self, expression: exp.Version) -> str: 652 sql = super().version_sql(expression) 653 return sql.replace("FOR ", "", 1)
IDENTIFIERS_CAN_START_WITH_DIGIT =
True
Determines whether or not an unquoted identifier can start with a digit.
SUPPORTS_USER_DEFINED_TYPES =
False
Determines whether or not user-defined data types are supported.
SAFE_DIVISION =
True
Determines whether division by zero throws an error (False
) or returns NULL (True
).
NORMALIZATION_STRATEGY =
<NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>
Specifies the strategy according to which identifiers should be normalized.
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
Associates this dialect's time formats with their equivalent Python strftime
format.
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- TABLESAMPLE_SIZE_IS_PERCENT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- TYPED_DIVISION
- CONCAT_COALESCE
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
252 class Tokenizer(tokens.Tokenizer): 253 QUOTES = ["'", '"'] 254 IDENTIFIERS = ["`"] 255 STRING_ESCAPES = ["\\"] 256 257 SINGLE_TOKENS = { 258 **tokens.Tokenizer.SINGLE_TOKENS, 259 "$": TokenType.PARAMETER, 260 } 261 262 KEYWORDS = { 263 **tokens.Tokenizer.KEYWORDS, 264 "ADD ARCHIVE": TokenType.COMMAND, 265 "ADD ARCHIVES": TokenType.COMMAND, 266 "ADD FILE": TokenType.COMMAND, 267 "ADD FILES": TokenType.COMMAND, 268 "ADD JAR": TokenType.COMMAND, 269 "ADD JARS": TokenType.COMMAND, 270 "MSCK REPAIR": TokenType.COMMAND, 271 "REFRESH": TokenType.REFRESH, 272 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 273 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 274 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 275 } 276 277 NUMERIC_LITERALS = { 278 "L": "BIGINT", 279 "S": "SMALLINT", 280 "Y": "TINYINT", 281 "D": "DOUBLE", 282 "F": "FLOAT", 283 "BD": "DECIMAL", 284 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
NUMERIC_LITERALS =
{'L': 'BIGINT', 'S': 'SMALLINT', 'Y': 'TINYINT', 'D': 'DOUBLE', 'F': 'FLOAT', 'BD': 'DECIMAL'}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- COMMENTS
- dialect
- reset
- tokenize
- peek
- tokenize_rs
- size
- sql
- tokens
286 class Parser(parser.Parser): 287 LOG_DEFAULTS_TO_LN = True 288 STRICT_CAST = False 289 290 FUNCTIONS = { 291 **parser.Parser.FUNCTIONS, 292 "BASE64": exp.ToBase64.from_arg_list, 293 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 294 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 295 "DATE_ADD": lambda args: exp.TsOrDsAdd( 296 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 297 ), 298 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 299 [ 300 exp.TimeStrToTime(this=seq_get(args, 0)), 301 seq_get(args, 1), 302 ] 303 ), 304 "DATE_SUB": lambda args: exp.TsOrDsAdd( 305 this=seq_get(args, 0), 306 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 307 unit=exp.Literal.string("DAY"), 308 ), 309 "DATEDIFF": lambda args: exp.DateDiff( 310 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 311 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 312 ), 313 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 314 "FIRST": _parse_ignore_nulls(exp.First), 315 "FIRST_VALUE": _parse_ignore_nulls(exp.FirstValue), 316 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 317 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 318 "LAST": _parse_ignore_nulls(exp.Last), 319 "LAST_VALUE": _parse_ignore_nulls(exp.LastValue), 320 "LOCATE": locate_to_strposition, 321 "MAP": parse_var_map, 322 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 323 "PERCENTILE": exp.Quantile.from_arg_list, 324 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 325 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 326 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 327 ), 328 "SIZE": exp.ArraySize.from_arg_list, 329 "SPLIT": exp.RegexpSplit.from_arg_list, 330 "STR_TO_MAP": lambda args: exp.StrToMap( 331 this=seq_get(args, 0), 332 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 333 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 334 ), 335 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 336 "TO_JSON": exp.JSONFormat.from_arg_list, 337 "UNBASE64": exp.FromBase64.from_arg_list, 338 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 339 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 340 } 341 342 NO_PAREN_FUNCTION_PARSERS = { 343 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 344 "TRANSFORM": lambda self: self._parse_transform(), 345 } 346 347 PROPERTY_PARSERS = { 348 **parser.Parser.PROPERTY_PARSERS, 349 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 350 expressions=self._parse_wrapped_csv(self._parse_property) 351 ), 352 } 353 354 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 355 if not self._match(TokenType.L_PAREN, advance=False): 356 self._retreat(self._index - 1) 357 return None 358 359 args = self._parse_wrapped_csv(self._parse_lambda) 360 row_format_before = self._parse_row_format(match_row=True) 361 362 record_writer = None 363 if self._match_text_seq("RECORDWRITER"): 364 record_writer = self._parse_string() 365 366 if not self._match(TokenType.USING): 367 return exp.Transform.from_arg_list(args) 368 369 command_script = self._parse_string() 370 371 self._match(TokenType.ALIAS) 372 schema = self._parse_schema() 373 374 row_format_after = self._parse_row_format(match_row=True) 375 record_reader = None 376 if self._match_text_seq("RECORDREADER"): 377 record_reader = self._parse_string() 378 379 return self.expression( 380 exp.QueryTransform, 381 expressions=args, 382 command_script=command_script, 383 schema=schema, 384 row_format_before=row_format_before, 385 record_writer=record_writer, 386 row_format_after=row_format_after, 387 record_reader=record_reader, 388 ) 389 390 def _parse_types( 391 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 392 ) -> t.Optional[exp.Expression]: 393 """ 394 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 395 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 396 397 spark-sql (default)> select cast(1234 as varchar(2)); 398 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 399 char/varchar type and simply treats them as string type. Please use string type 400 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 401 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 402 403 1234 404 Time taken: 4.265 seconds, Fetched 1 row(s) 405 406 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 407 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 408 409 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 410 """ 411 this = super()._parse_types( 412 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 413 ) 414 415 if this and not schema: 416 return this.transform( 417 lambda node: ( 418 node.replace(exp.DataType.build("text")) 419 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 420 else node 421 ), 422 copy=False, 423 ) 424 425 return this 426 427 def _parse_partition_and_order( 428 self, 429 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 430 return ( 431 ( 432 self._parse_csv(self._parse_conjunction) 433 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 434 else [] 435 ), 436 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 437 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANONYMOUS_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnonymousAggFunc'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_OVERLAPS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayOverlaps'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'ARRAY_UNIQUE_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'COMBINED_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedAggFunc'>>, 'COMBINED_PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedParameterizedAgg'>>, 'CONCAT': <function Parser.<lambda>>, 'CONCAT_WS': <function Parser.<lambda>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'COUNTIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATE_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <function _parse_ignore_nulls.<locals>._parse>, 'FIRST_VALUE': <function _parse_ignore_nulls.<locals>._parse>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_INF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'ISINF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <function parse_extract_json_with_path.<locals>._parser>, 'JSON_EXTRACT_SCALAR': <function parse_extract_json_with_path.<locals>._parser>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_OBJECT_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObjectAgg'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lag'>>, 'LAST': <function _parse_ignore_nulls.<locals>._parse>, 'LAST_DAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_VALUE': <function _parse_ignore_nulls.<locals>._parse>, 'LEAD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lead'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <function parse_logarithm>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NTH_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NthValue'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RAND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDOM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Randn'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIMEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMPDIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMPFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToArray'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsDiff'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'TS_OR_DS_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToTime'>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixDate'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'JSON_EXTRACT_PATH_TEXT': <function parse_extract_json_with_path.<locals>._parser>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'CONTAINS': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INHERITS': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MODIFIES': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'READS': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
439 class Generator(generator.Generator): 440 LIMIT_FETCH = "LIMIT" 441 TABLESAMPLE_WITH_METHOD = False 442 JOIN_HINTS = False 443 TABLE_HINTS = False 444 QUERY_HINTS = False 445 INDEX_ON = "ON TABLE" 446 EXTRACT_ALLOWS_QUOTES = False 447 NVL2_SUPPORTED = False 448 LAST_DAY_SUPPORTS_DATE_PART = False 449 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 450 451 EXPRESSIONS_WITHOUT_NESTED_CTES = { 452 exp.Insert, 453 exp.Select, 454 exp.Subquery, 455 exp.Union, 456 } 457 458 SUPPORTED_JSON_PATH_PARTS = { 459 exp.JSONPathKey, 460 exp.JSONPathRoot, 461 exp.JSONPathSubscript, 462 exp.JSONPathWildcard, 463 } 464 465 TYPE_MAPPING = { 466 **generator.Generator.TYPE_MAPPING, 467 exp.DataType.Type.BIT: "BOOLEAN", 468 exp.DataType.Type.DATETIME: "TIMESTAMP", 469 exp.DataType.Type.TEXT: "STRING", 470 exp.DataType.Type.TIME: "TIMESTAMP", 471 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 472 exp.DataType.Type.VARBINARY: "BINARY", 473 } 474 475 TRANSFORMS = { 476 **generator.Generator.TRANSFORMS, 477 exp.Group: transforms.preprocess([transforms.unalias_group]), 478 exp.Select: transforms.preprocess( 479 [ 480 transforms.eliminate_qualify, 481 transforms.eliminate_distinct_on, 482 transforms.unnest_to_explode, 483 ] 484 ), 485 exp.Property: _property_sql, 486 exp.AnyValue: rename_func("FIRST"), 487 exp.ApproxDistinct: approx_count_distinct_sql, 488 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 489 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 490 exp.ArrayConcat: rename_func("CONCAT"), 491 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 492 exp.ArraySize: rename_func("SIZE"), 493 exp.ArraySort: _array_sort_sql, 494 exp.With: no_recursive_cte_sql, 495 exp.DateAdd: _add_date_sql, 496 exp.DateDiff: _date_diff_sql, 497 exp.DateStrToDate: datestrtodate_sql, 498 exp.DateSub: _add_date_sql, 499 exp.DateToDi: lambda self, 500 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 501 exp.DiToDate: lambda self, 502 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 503 exp.FileFormatProperty: lambda self, 504 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 505 exp.FromBase64: rename_func("UNBASE64"), 506 exp.If: if_sql(), 507 exp.ILike: no_ilike_sql, 508 exp.IsNan: rename_func("ISNAN"), 509 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 510 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 511 exp.JSONFormat: _json_format_sql, 512 exp.Left: left_to_substring_sql, 513 exp.Map: var_map_sql, 514 exp.Max: max_or_greatest, 515 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 516 exp.Min: min_or_least, 517 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 518 exp.NotNullColumnConstraint: lambda self, e: ( 519 "" if e.args.get("allow_null") else "NOT NULL" 520 ), 521 exp.VarMap: var_map_sql, 522 exp.Create: _create_sql, 523 exp.Quantile: rename_func("PERCENTILE"), 524 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 525 exp.RegexpExtract: regexp_extract_sql, 526 exp.RegexpReplace: regexp_replace_sql, 527 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 528 exp.RegexpSplit: rename_func("SPLIT"), 529 exp.Right: right_to_substring_sql, 530 exp.SafeDivide: no_safe_divide_sql, 531 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 532 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 533 exp.Split: lambda self, 534 e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 535 exp.StrPosition: strposition_to_locate_sql, 536 exp.StrToDate: _str_to_date_sql, 537 exp.StrToTime: _str_to_time_sql, 538 exp.StrToUnix: _str_to_unix_sql, 539 exp.StructExtract: struct_extract_sql, 540 exp.TimeStrToDate: rename_func("TO_DATE"), 541 exp.TimeStrToTime: timestrtotime_sql, 542 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 543 exp.TimeToStr: _time_to_str, 544 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 545 exp.ToBase64: rename_func("BASE64"), 546 exp.TsOrDiToDi: lambda self, 547 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 548 exp.TsOrDsAdd: _add_date_sql, 549 exp.TsOrDsDiff: _date_diff_sql, 550 exp.TsOrDsToDate: _to_date_sql, 551 exp.TryCast: no_trycast_sql, 552 exp.UnixToStr: lambda self, e: self.func( 553 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 554 ), 555 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 556 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 557 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 558 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 559 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 560 exp.National: lambda self, e: self.national_sql(e, prefix=""), 561 exp.ClusteredColumnConstraint: lambda self, 562 e: f"({self.expressions(e, 'this', indent=False)})", 563 exp.NonClusteredColumnConstraint: lambda self, 564 e: f"({self.expressions(e, 'this', indent=False)})", 565 exp.NotForReplicationColumnConstraint: lambda self, e: "", 566 exp.OnProperty: lambda self, e: "", 567 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 568 } 569 570 PROPERTIES_LOCATION = { 571 **generator.Generator.PROPERTIES_LOCATION, 572 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 573 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 574 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 575 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 576 } 577 578 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 579 if isinstance(expression.this, exp.JSONPathWildcard): 580 self.unsupported("Unsupported wildcard in JSONPathKey expression") 581 return "" 582 583 return super()._jsonpathkey_sql(expression) 584 585 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 586 # Hive has no temporary storage provider (there are hive settings though) 587 return expression 588 589 def parameter_sql(self, expression: exp.Parameter) -> str: 590 this = self.sql(expression, "this") 591 expression_sql = self.sql(expression, "expression") 592 593 parent = expression.parent 594 this = f"{this}:{expression_sql}" if expression_sql else this 595 596 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 597 # We need to produce SET key = value instead of SET ${key} = value 598 return this 599 600 return f"${{{this}}}" 601 602 def schema_sql(self, expression: exp.Schema) -> str: 603 for ordered in expression.find_all(exp.Ordered): 604 if ordered.args.get("desc") is False: 605 ordered.set("desc", None) 606 607 return super().schema_sql(expression) 608 609 def constraint_sql(self, expression: exp.Constraint) -> str: 610 for prop in list(expression.find_all(exp.Properties)): 611 prop.pop() 612 613 this = self.sql(expression, "this") 614 expressions = self.expressions(expression, sep=" ", flat=True) 615 return f"CONSTRAINT {this} {expressions}" 616 617 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 618 serde_props = self.sql(expression, "serde_properties") 619 serde_props = f" {serde_props}" if serde_props else "" 620 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 621 622 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 623 return self.func( 624 "COLLECT_LIST", 625 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 626 ) 627 628 def with_properties(self, properties: exp.Properties) -> str: 629 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 630 631 def datatype_sql(self, expression: exp.DataType) -> str: 632 if ( 633 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 634 and not expression.expressions 635 ): 636 expression = exp.DataType.build("text") 637 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 638 expression.set("this", exp.DataType.Type.VARCHAR) 639 elif expression.this in exp.DataType.TEMPORAL_TYPES: 640 expression = exp.DataType.build(expression.this) 641 elif expression.is_type("float"): 642 size_expression = expression.find(exp.DataTypeParam) 643 if size_expression: 644 size = int(size_expression.name) 645 expression = ( 646 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 647 ) 648 649 return super().datatype_sql(expression) 650 651 def version_sql(self, expression: exp.Version) -> str: 652 sql = super().version_sql(expression) 653 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
EXPRESSIONS_WITHOUT_NESTED_CTES =
{<class 'sqlglot.expressions.Select'>, <class 'sqlglot.expressions.Union'>, <class 'sqlglot.expressions.Insert'>, <class 'sqlglot.expressions.Subquery'>}
SUPPORTED_JSON_PATH_PARTS =
{<class 'sqlglot.expressions.JSONPathSubscript'>, <class 'sqlglot.expressions.JSONPathRoot'>, <class 'sqlglot.expressions.JSONPathKey'>, <class 'sqlglot.expressions.JSONPathWildcard'>}
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.JSONPathKey'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathRoot'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathSubscript'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathWildcard'>: <function <lambda>>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InheritsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetConfigProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function datestrtodate_sql>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function _create_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArrayUniqueAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InheritsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SetConfigProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
def
temporary_storage_provider( self, expression: sqlglot.expressions.Create) -> sqlglot.expressions.Create:
589 def parameter_sql(self, expression: exp.Parameter) -> str: 590 this = self.sql(expression, "this") 591 expression_sql = self.sql(expression, "expression") 592 593 parent = expression.parent 594 this = f"{this}:{expression_sql}" if expression_sql else this 595 596 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 597 # We need to produce SET key = value instead of SET ${key} = value 598 return this 599 600 return f"${{{this}}}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
631 def datatype_sql(self, expression: exp.DataType) -> str: 632 if ( 633 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 634 and not expression.expressions 635 ): 636 expression = exp.DataType.build("text") 637 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 638 expression.set("this", exp.DataType.Type.VARCHAR) 639 elif expression.this in exp.DataType.TEMPORAL_TYPES: 640 expression = exp.DataType.build(expression.this) 641 elif expression.is_type("float"): 642 size_expression = expression.find(exp.DataTypeParam) 643 if size_expression: 644 size = int(size_expression.name) 645 expression = ( 646 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 647 ) 648 649 return super().datatype_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql