sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 is_parse_json, 13 left_to_substring_sql, 14 locate_to_strposition, 15 max_or_greatest, 16 min_or_least, 17 no_ilike_sql, 18 no_recursive_cte_sql, 19 no_safe_divide_sql, 20 no_trycast_sql, 21 regexp_extract_sql, 22 regexp_replace_sql, 23 rename_func, 24 right_to_substring_sql, 25 strposition_to_locate_sql, 26 struct_extract_sql, 27 time_format, 28 timestrtotime_sql, 29 var_map_sql, 30) 31from sqlglot.helper import seq_get 32from sqlglot.parser import parse_var_map 33from sqlglot.tokens import TokenType 34 35# (FuncType, Multiplier) 36DATE_DELTA_INTERVAL = { 37 "YEAR": ("ADD_MONTHS", 12), 38 "MONTH": ("ADD_MONTHS", 1), 39 "QUARTER": ("ADD_MONTHS", 3), 40 "WEEK": ("DATE_ADD", 7), 41 "DAY": ("DATE_ADD", 1), 42} 43 44TIME_DIFF_FACTOR = { 45 "MILLISECOND": " * 1000", 46 "SECOND": "", 47 "MINUTE": " / 60", 48 "HOUR": " / 3600", 49} 50 51DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 52 53 54def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 55 unit = expression.text("unit").upper() 56 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 57 58 if isinstance(expression, exp.DateSub): 59 multiplier *= -1 60 61 if expression.expression.is_number: 62 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 63 else: 64 modified_increment = expression.expression.copy() 65 if multiplier != 1: 66 modified_increment = exp.Mul( # type: ignore 67 this=modified_increment, expression=exp.Literal.number(multiplier) 68 ) 69 70 return self.func(func, expression.this, modified_increment) 71 72 73def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str: 74 unit = expression.text("unit").upper() 75 76 factor = TIME_DIFF_FACTOR.get(unit) 77 if factor is not None: 78 left = self.sql(expression, "this") 79 right = self.sql(expression, "expression") 80 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 81 return f"({sec_diff}){factor}" if factor else sec_diff 82 83 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 84 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 85 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 86 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 87 88 return f"{diff_sql}{multiplier_sql}" 89 90 91def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 92 this = expression.this 93 if is_parse_json(this) and this.this.is_string: 94 # Since FROM_JSON requires a nested type, we always wrap the json string with 95 # an array to ensure that "naked" strings like "'a'" will be handled correctly 96 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 97 98 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 99 to_json = self.func("TO_JSON", from_json) 100 101 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 102 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 103 104 return self.func("TO_JSON", this, expression.args.get("options")) 105 106 107def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 108 if expression.expression: 109 self.unsupported("Hive SORT_ARRAY does not support a comparator") 110 return f"SORT_ARRAY({self.sql(expression, 'this')})" 111 112 113def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 114 return f"'{expression.name}'={self.sql(expression, 'value')}" 115 116 117def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 118 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 119 120 121def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 122 this = self.sql(expression, "this") 123 time_format = self.format_time(expression) 124 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 125 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 126 return f"CAST({this} AS DATE)" 127 128 129def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 130 this = self.sql(expression, "this") 131 time_format = self.format_time(expression) 132 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 133 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 134 return f"CAST({this} AS TIMESTAMP)" 135 136 137def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 138 this = self.sql(expression, "this") 139 time_format = self.format_time(expression) 140 return f"DATE_FORMAT({this}, {time_format})" 141 142 143def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 144 this = self.sql(expression, "this") 145 time_format = self.format_time(expression) 146 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 147 return f"TO_DATE({this}, {time_format})" 148 return f"TO_DATE({this})" 149 150 151class Hive(Dialect): 152 ALIAS_POST_TABLESAMPLE = True 153 IDENTIFIERS_CAN_START_WITH_DIGIT = True 154 SUPPORTS_USER_DEFINED_TYPES = False 155 156 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 157 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 158 159 TIME_MAPPING = { 160 "y": "%Y", 161 "Y": "%Y", 162 "YYYY": "%Y", 163 "yyyy": "%Y", 164 "YY": "%y", 165 "yy": "%y", 166 "MMMM": "%B", 167 "MMM": "%b", 168 "MM": "%m", 169 "M": "%-m", 170 "dd": "%d", 171 "d": "%-d", 172 "HH": "%H", 173 "H": "%-H", 174 "hh": "%I", 175 "h": "%-I", 176 "mm": "%M", 177 "m": "%-M", 178 "ss": "%S", 179 "s": "%-S", 180 "SSSSSS": "%f", 181 "a": "%p", 182 "DD": "%j", 183 "D": "%-j", 184 "E": "%a", 185 "EE": "%a", 186 "EEE": "%a", 187 "EEEE": "%A", 188 } 189 190 DATE_FORMAT = "'yyyy-MM-dd'" 191 DATEINT_FORMAT = "'yyyyMMdd'" 192 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 193 194 class Tokenizer(tokens.Tokenizer): 195 QUOTES = ["'", '"'] 196 IDENTIFIERS = ["`"] 197 STRING_ESCAPES = ["\\"] 198 ENCODE = "utf-8" 199 200 KEYWORDS = { 201 **tokens.Tokenizer.KEYWORDS, 202 "ADD ARCHIVE": TokenType.COMMAND, 203 "ADD ARCHIVES": TokenType.COMMAND, 204 "ADD FILE": TokenType.COMMAND, 205 "ADD FILES": TokenType.COMMAND, 206 "ADD JAR": TokenType.COMMAND, 207 "ADD JARS": TokenType.COMMAND, 208 "MSCK REPAIR": TokenType.COMMAND, 209 "REFRESH": TokenType.COMMAND, 210 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 211 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 212 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 213 } 214 215 NUMERIC_LITERALS = { 216 "L": "BIGINT", 217 "S": "SMALLINT", 218 "Y": "TINYINT", 219 "D": "DOUBLE", 220 "F": "FLOAT", 221 "BD": "DECIMAL", 222 } 223 224 class Parser(parser.Parser): 225 LOG_DEFAULTS_TO_LN = True 226 STRICT_CAST = False 227 228 FUNCTIONS = { 229 **parser.Parser.FUNCTIONS, 230 "BASE64": exp.ToBase64.from_arg_list, 231 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 232 "COLLECT_SET": exp.SetAgg.from_arg_list, 233 "DATE_ADD": lambda args: exp.TsOrDsAdd( 234 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 235 ), 236 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 237 [ 238 exp.TimeStrToTime(this=seq_get(args, 0)), 239 seq_get(args, 1), 240 ] 241 ), 242 "DATE_SUB": lambda args: exp.TsOrDsAdd( 243 this=seq_get(args, 0), 244 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 245 unit=exp.Literal.string("DAY"), 246 ), 247 "DATEDIFF": lambda args: exp.DateDiff( 248 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 249 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 250 ), 251 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 252 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 253 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 254 "LOCATE": locate_to_strposition, 255 "MAP": parse_var_map, 256 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 257 "PERCENTILE": exp.Quantile.from_arg_list, 258 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 259 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 260 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 261 ), 262 "SIZE": exp.ArraySize.from_arg_list, 263 "SPLIT": exp.RegexpSplit.from_arg_list, 264 "STR_TO_MAP": lambda args: exp.StrToMap( 265 this=seq_get(args, 0), 266 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 267 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 268 ), 269 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 270 "TO_JSON": exp.JSONFormat.from_arg_list, 271 "UNBASE64": exp.FromBase64.from_arg_list, 272 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 273 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 274 } 275 276 NO_PAREN_FUNCTION_PARSERS = { 277 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 278 "TRANSFORM": lambda self: self._parse_transform(), 279 } 280 281 PROPERTY_PARSERS = { 282 **parser.Parser.PROPERTY_PARSERS, 283 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 284 expressions=self._parse_wrapped_csv(self._parse_property) 285 ), 286 } 287 288 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 289 if not self._match(TokenType.L_PAREN, advance=False): 290 self._retreat(self._index - 1) 291 return None 292 293 args = self._parse_wrapped_csv(self._parse_lambda) 294 row_format_before = self._parse_row_format(match_row=True) 295 296 record_writer = None 297 if self._match_text_seq("RECORDWRITER"): 298 record_writer = self._parse_string() 299 300 if not self._match(TokenType.USING): 301 return exp.Transform.from_arg_list(args) 302 303 command_script = self._parse_string() 304 305 self._match(TokenType.ALIAS) 306 schema = self._parse_schema() 307 308 row_format_after = self._parse_row_format(match_row=True) 309 record_reader = None 310 if self._match_text_seq("RECORDREADER"): 311 record_reader = self._parse_string() 312 313 return self.expression( 314 exp.QueryTransform, 315 expressions=args, 316 command_script=command_script, 317 schema=schema, 318 row_format_before=row_format_before, 319 record_writer=record_writer, 320 row_format_after=row_format_after, 321 record_reader=record_reader, 322 ) 323 324 def _parse_types( 325 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 326 ) -> t.Optional[exp.Expression]: 327 """ 328 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 329 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 330 331 spark-sql (default)> select cast(1234 as varchar(2)); 332 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 333 char/varchar type and simply treats them as string type. Please use string type 334 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 335 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 336 337 1234 338 Time taken: 4.265 seconds, Fetched 1 row(s) 339 340 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 341 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 342 343 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 344 """ 345 this = super()._parse_types( 346 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 347 ) 348 349 if this and not schema: 350 return this.transform( 351 lambda node: node.replace(exp.DataType.build("text")) 352 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 353 else node, 354 copy=False, 355 ) 356 357 return this 358 359 def _parse_partition_and_order( 360 self, 361 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 362 return ( 363 self._parse_csv(self._parse_conjunction) 364 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 365 else [], 366 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 367 ) 368 369 class Generator(generator.Generator): 370 LIMIT_FETCH = "LIMIT" 371 TABLESAMPLE_WITH_METHOD = False 372 TABLESAMPLE_SIZE_IS_PERCENT = True 373 JOIN_HINTS = False 374 TABLE_HINTS = False 375 QUERY_HINTS = False 376 INDEX_ON = "ON TABLE" 377 EXTRACT_ALLOWS_QUOTES = False 378 NVL2_SUPPORTED = False 379 380 TYPE_MAPPING = { 381 **generator.Generator.TYPE_MAPPING, 382 exp.DataType.Type.BIT: "BOOLEAN", 383 exp.DataType.Type.DATETIME: "TIMESTAMP", 384 exp.DataType.Type.TEXT: "STRING", 385 exp.DataType.Type.TIME: "TIMESTAMP", 386 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 387 exp.DataType.Type.VARBINARY: "BINARY", 388 } 389 390 TRANSFORMS = { 391 **generator.Generator.TRANSFORMS, 392 exp.Group: transforms.preprocess([transforms.unalias_group]), 393 exp.Select: transforms.preprocess( 394 [ 395 transforms.eliminate_qualify, 396 transforms.eliminate_distinct_on, 397 transforms.unnest_to_explode, 398 ] 399 ), 400 exp.Property: _property_sql, 401 exp.AnyValue: rename_func("FIRST"), 402 exp.ApproxDistinct: approx_count_distinct_sql, 403 exp.ArrayConcat: rename_func("CONCAT"), 404 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 405 exp.ArraySize: rename_func("SIZE"), 406 exp.ArraySort: _array_sort_sql, 407 exp.With: no_recursive_cte_sql, 408 exp.DateAdd: _add_date_sql, 409 exp.DateDiff: _date_diff_sql, 410 exp.DateStrToDate: rename_func("TO_DATE"), 411 exp.DateSub: _add_date_sql, 412 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 413 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 414 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 415 exp.FromBase64: rename_func("UNBASE64"), 416 exp.If: if_sql(), 417 exp.ILike: no_ilike_sql, 418 exp.IsNan: rename_func("ISNAN"), 419 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 420 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 421 exp.JSONFormat: _json_format_sql, 422 exp.Left: left_to_substring_sql, 423 exp.Map: var_map_sql, 424 exp.Max: max_or_greatest, 425 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 426 exp.Min: min_or_least, 427 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 428 exp.NotNullColumnConstraint: lambda self, e: "" 429 if e.args.get("allow_null") 430 else "NOT NULL", 431 exp.VarMap: var_map_sql, 432 exp.Create: create_with_partitions_sql, 433 exp.Quantile: rename_func("PERCENTILE"), 434 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 435 exp.RegexpExtract: regexp_extract_sql, 436 exp.RegexpReplace: regexp_replace_sql, 437 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 438 exp.RegexpSplit: rename_func("SPLIT"), 439 exp.Right: right_to_substring_sql, 440 exp.SafeDivide: no_safe_divide_sql, 441 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 442 exp.SetAgg: rename_func("COLLECT_SET"), 443 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 444 exp.StrPosition: strposition_to_locate_sql, 445 exp.StrToDate: _str_to_date_sql, 446 exp.StrToTime: _str_to_time_sql, 447 exp.StrToUnix: _str_to_unix_sql, 448 exp.StructExtract: struct_extract_sql, 449 exp.TimeStrToDate: rename_func("TO_DATE"), 450 exp.TimeStrToTime: timestrtotime_sql, 451 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 452 exp.TimeToStr: _time_to_str, 453 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 454 exp.ToBase64: rename_func("BASE64"), 455 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 456 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 457 exp.TsOrDsToDate: _to_date_sql, 458 exp.TryCast: no_trycast_sql, 459 exp.UnixToStr: lambda self, e: self.func( 460 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 461 ), 462 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 463 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 464 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 465 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 466 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 467 exp.LastDateOfMonth: rename_func("LAST_DAY"), 468 exp.National: lambda self, e: self.national_sql(e, prefix=""), 469 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 470 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 471 exp.NotForReplicationColumnConstraint: lambda self, e: "", 472 exp.OnProperty: lambda self, e: "", 473 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 474 } 475 476 PROPERTIES_LOCATION = { 477 **generator.Generator.PROPERTIES_LOCATION, 478 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 479 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 480 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 481 } 482 483 def parameter_sql(self, expression: exp.Parameter) -> str: 484 this = self.sql(expression, "this") 485 parent = expression.parent 486 487 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 488 # We need to produce SET key = value instead of SET ${key} = value 489 return this 490 491 return f"${{{this}}}" 492 493 def schema_sql(self, expression: exp.Schema) -> str: 494 expression = expression.copy() 495 496 for ordered in expression.find_all(exp.Ordered): 497 if ordered.args.get("desc") is False: 498 ordered.set("desc", None) 499 500 return super().schema_sql(expression) 501 502 def constraint_sql(self, expression: exp.Constraint) -> str: 503 expression = expression.copy() 504 505 for prop in list(expression.find_all(exp.Properties)): 506 prop.pop() 507 508 this = self.sql(expression, "this") 509 expressions = self.expressions(expression, sep=" ", flat=True) 510 return f"CONSTRAINT {this} {expressions}" 511 512 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 513 serde_props = self.sql(expression, "serde_properties") 514 serde_props = f" {serde_props}" if serde_props else "" 515 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 516 517 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 518 return self.func( 519 "COLLECT_LIST", 520 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 521 ) 522 523 def with_properties(self, properties: exp.Properties) -> str: 524 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 525 526 def datatype_sql(self, expression: exp.DataType) -> str: 527 if ( 528 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 529 and not expression.expressions 530 ): 531 expression = exp.DataType.build("text") 532 elif expression.this in exp.DataType.TEMPORAL_TYPES: 533 expression = exp.DataType.build(expression.this) 534 elif expression.is_type("float"): 535 size_expression = expression.find(exp.DataTypeParam) 536 if size_expression: 537 size = int(size_expression.name) 538 expression = ( 539 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 540 ) 541 542 return super().datatype_sql(expression) 543 544 def version_sql(self, expression: exp.Version) -> str: 545 sql = super().version_sql(expression) 546 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
152class Hive(Dialect): 153 ALIAS_POST_TABLESAMPLE = True 154 IDENTIFIERS_CAN_START_WITH_DIGIT = True 155 SUPPORTS_USER_DEFINED_TYPES = False 156 157 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 158 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 159 160 TIME_MAPPING = { 161 "y": "%Y", 162 "Y": "%Y", 163 "YYYY": "%Y", 164 "yyyy": "%Y", 165 "YY": "%y", 166 "yy": "%y", 167 "MMMM": "%B", 168 "MMM": "%b", 169 "MM": "%m", 170 "M": "%-m", 171 "dd": "%d", 172 "d": "%-d", 173 "HH": "%H", 174 "H": "%-H", 175 "hh": "%I", 176 "h": "%-I", 177 "mm": "%M", 178 "m": "%-M", 179 "ss": "%S", 180 "s": "%-S", 181 "SSSSSS": "%f", 182 "a": "%p", 183 "DD": "%j", 184 "D": "%-j", 185 "E": "%a", 186 "EE": "%a", 187 "EEE": "%a", 188 "EEEE": "%A", 189 } 190 191 DATE_FORMAT = "'yyyy-MM-dd'" 192 DATEINT_FORMAT = "'yyyyMMdd'" 193 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 194 195 class Tokenizer(tokens.Tokenizer): 196 QUOTES = ["'", '"'] 197 IDENTIFIERS = ["`"] 198 STRING_ESCAPES = ["\\"] 199 ENCODE = "utf-8" 200 201 KEYWORDS = { 202 **tokens.Tokenizer.KEYWORDS, 203 "ADD ARCHIVE": TokenType.COMMAND, 204 "ADD ARCHIVES": TokenType.COMMAND, 205 "ADD FILE": TokenType.COMMAND, 206 "ADD FILES": TokenType.COMMAND, 207 "ADD JAR": TokenType.COMMAND, 208 "ADD JARS": TokenType.COMMAND, 209 "MSCK REPAIR": TokenType.COMMAND, 210 "REFRESH": TokenType.COMMAND, 211 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 212 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 213 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 214 } 215 216 NUMERIC_LITERALS = { 217 "L": "BIGINT", 218 "S": "SMALLINT", 219 "Y": "TINYINT", 220 "D": "DOUBLE", 221 "F": "FLOAT", 222 "BD": "DECIMAL", 223 } 224 225 class Parser(parser.Parser): 226 LOG_DEFAULTS_TO_LN = True 227 STRICT_CAST = False 228 229 FUNCTIONS = { 230 **parser.Parser.FUNCTIONS, 231 "BASE64": exp.ToBase64.from_arg_list, 232 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 233 "COLLECT_SET": exp.SetAgg.from_arg_list, 234 "DATE_ADD": lambda args: exp.TsOrDsAdd( 235 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 236 ), 237 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 238 [ 239 exp.TimeStrToTime(this=seq_get(args, 0)), 240 seq_get(args, 1), 241 ] 242 ), 243 "DATE_SUB": lambda args: exp.TsOrDsAdd( 244 this=seq_get(args, 0), 245 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 246 unit=exp.Literal.string("DAY"), 247 ), 248 "DATEDIFF": lambda args: exp.DateDiff( 249 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 250 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 251 ), 252 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 253 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 254 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 255 "LOCATE": locate_to_strposition, 256 "MAP": parse_var_map, 257 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 258 "PERCENTILE": exp.Quantile.from_arg_list, 259 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 260 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 261 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 262 ), 263 "SIZE": exp.ArraySize.from_arg_list, 264 "SPLIT": exp.RegexpSplit.from_arg_list, 265 "STR_TO_MAP": lambda args: exp.StrToMap( 266 this=seq_get(args, 0), 267 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 268 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 269 ), 270 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 271 "TO_JSON": exp.JSONFormat.from_arg_list, 272 "UNBASE64": exp.FromBase64.from_arg_list, 273 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 274 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 275 } 276 277 NO_PAREN_FUNCTION_PARSERS = { 278 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 279 "TRANSFORM": lambda self: self._parse_transform(), 280 } 281 282 PROPERTY_PARSERS = { 283 **parser.Parser.PROPERTY_PARSERS, 284 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 285 expressions=self._parse_wrapped_csv(self._parse_property) 286 ), 287 } 288 289 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 290 if not self._match(TokenType.L_PAREN, advance=False): 291 self._retreat(self._index - 1) 292 return None 293 294 args = self._parse_wrapped_csv(self._parse_lambda) 295 row_format_before = self._parse_row_format(match_row=True) 296 297 record_writer = None 298 if self._match_text_seq("RECORDWRITER"): 299 record_writer = self._parse_string() 300 301 if not self._match(TokenType.USING): 302 return exp.Transform.from_arg_list(args) 303 304 command_script = self._parse_string() 305 306 self._match(TokenType.ALIAS) 307 schema = self._parse_schema() 308 309 row_format_after = self._parse_row_format(match_row=True) 310 record_reader = None 311 if self._match_text_seq("RECORDREADER"): 312 record_reader = self._parse_string() 313 314 return self.expression( 315 exp.QueryTransform, 316 expressions=args, 317 command_script=command_script, 318 schema=schema, 319 row_format_before=row_format_before, 320 record_writer=record_writer, 321 row_format_after=row_format_after, 322 record_reader=record_reader, 323 ) 324 325 def _parse_types( 326 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 327 ) -> t.Optional[exp.Expression]: 328 """ 329 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 330 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 331 332 spark-sql (default)> select cast(1234 as varchar(2)); 333 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 334 char/varchar type and simply treats them as string type. Please use string type 335 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 336 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 337 338 1234 339 Time taken: 4.265 seconds, Fetched 1 row(s) 340 341 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 342 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 343 344 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 345 """ 346 this = super()._parse_types( 347 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 348 ) 349 350 if this and not schema: 351 return this.transform( 352 lambda node: node.replace(exp.DataType.build("text")) 353 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 354 else node, 355 copy=False, 356 ) 357 358 return this 359 360 def _parse_partition_and_order( 361 self, 362 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 363 return ( 364 self._parse_csv(self._parse_conjunction) 365 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 366 else [], 367 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 368 ) 369 370 class Generator(generator.Generator): 371 LIMIT_FETCH = "LIMIT" 372 TABLESAMPLE_WITH_METHOD = False 373 TABLESAMPLE_SIZE_IS_PERCENT = True 374 JOIN_HINTS = False 375 TABLE_HINTS = False 376 QUERY_HINTS = False 377 INDEX_ON = "ON TABLE" 378 EXTRACT_ALLOWS_QUOTES = False 379 NVL2_SUPPORTED = False 380 381 TYPE_MAPPING = { 382 **generator.Generator.TYPE_MAPPING, 383 exp.DataType.Type.BIT: "BOOLEAN", 384 exp.DataType.Type.DATETIME: "TIMESTAMP", 385 exp.DataType.Type.TEXT: "STRING", 386 exp.DataType.Type.TIME: "TIMESTAMP", 387 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 388 exp.DataType.Type.VARBINARY: "BINARY", 389 } 390 391 TRANSFORMS = { 392 **generator.Generator.TRANSFORMS, 393 exp.Group: transforms.preprocess([transforms.unalias_group]), 394 exp.Select: transforms.preprocess( 395 [ 396 transforms.eliminate_qualify, 397 transforms.eliminate_distinct_on, 398 transforms.unnest_to_explode, 399 ] 400 ), 401 exp.Property: _property_sql, 402 exp.AnyValue: rename_func("FIRST"), 403 exp.ApproxDistinct: approx_count_distinct_sql, 404 exp.ArrayConcat: rename_func("CONCAT"), 405 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 406 exp.ArraySize: rename_func("SIZE"), 407 exp.ArraySort: _array_sort_sql, 408 exp.With: no_recursive_cte_sql, 409 exp.DateAdd: _add_date_sql, 410 exp.DateDiff: _date_diff_sql, 411 exp.DateStrToDate: rename_func("TO_DATE"), 412 exp.DateSub: _add_date_sql, 413 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 414 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 415 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 416 exp.FromBase64: rename_func("UNBASE64"), 417 exp.If: if_sql(), 418 exp.ILike: no_ilike_sql, 419 exp.IsNan: rename_func("ISNAN"), 420 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 421 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 422 exp.JSONFormat: _json_format_sql, 423 exp.Left: left_to_substring_sql, 424 exp.Map: var_map_sql, 425 exp.Max: max_or_greatest, 426 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 427 exp.Min: min_or_least, 428 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 429 exp.NotNullColumnConstraint: lambda self, e: "" 430 if e.args.get("allow_null") 431 else "NOT NULL", 432 exp.VarMap: var_map_sql, 433 exp.Create: create_with_partitions_sql, 434 exp.Quantile: rename_func("PERCENTILE"), 435 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 436 exp.RegexpExtract: regexp_extract_sql, 437 exp.RegexpReplace: regexp_replace_sql, 438 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 439 exp.RegexpSplit: rename_func("SPLIT"), 440 exp.Right: right_to_substring_sql, 441 exp.SafeDivide: no_safe_divide_sql, 442 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 443 exp.SetAgg: rename_func("COLLECT_SET"), 444 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 445 exp.StrPosition: strposition_to_locate_sql, 446 exp.StrToDate: _str_to_date_sql, 447 exp.StrToTime: _str_to_time_sql, 448 exp.StrToUnix: _str_to_unix_sql, 449 exp.StructExtract: struct_extract_sql, 450 exp.TimeStrToDate: rename_func("TO_DATE"), 451 exp.TimeStrToTime: timestrtotime_sql, 452 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 453 exp.TimeToStr: _time_to_str, 454 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 455 exp.ToBase64: rename_func("BASE64"), 456 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 457 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 458 exp.TsOrDsToDate: _to_date_sql, 459 exp.TryCast: no_trycast_sql, 460 exp.UnixToStr: lambda self, e: self.func( 461 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 462 ), 463 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 464 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 465 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 466 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 467 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 468 exp.LastDateOfMonth: rename_func("LAST_DAY"), 469 exp.National: lambda self, e: self.national_sql(e, prefix=""), 470 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 471 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 472 exp.NotForReplicationColumnConstraint: lambda self, e: "", 473 exp.OnProperty: lambda self, e: "", 474 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 475 } 476 477 PROPERTIES_LOCATION = { 478 **generator.Generator.PROPERTIES_LOCATION, 479 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 480 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 481 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 482 } 483 484 def parameter_sql(self, expression: exp.Parameter) -> str: 485 this = self.sql(expression, "this") 486 parent = expression.parent 487 488 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 489 # We need to produce SET key = value instead of SET ${key} = value 490 return this 491 492 return f"${{{this}}}" 493 494 def schema_sql(self, expression: exp.Schema) -> str: 495 expression = expression.copy() 496 497 for ordered in expression.find_all(exp.Ordered): 498 if ordered.args.get("desc") is False: 499 ordered.set("desc", None) 500 501 return super().schema_sql(expression) 502 503 def constraint_sql(self, expression: exp.Constraint) -> str: 504 expression = expression.copy() 505 506 for prop in list(expression.find_all(exp.Properties)): 507 prop.pop() 508 509 this = self.sql(expression, "this") 510 expressions = self.expressions(expression, sep=" ", flat=True) 511 return f"CONSTRAINT {this} {expressions}" 512 513 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 514 serde_props = self.sql(expression, "serde_properties") 515 serde_props = f" {serde_props}" if serde_props else "" 516 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 517 518 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 519 return self.func( 520 "COLLECT_LIST", 521 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 522 ) 523 524 def with_properties(self, properties: exp.Properties) -> str: 525 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 526 527 def datatype_sql(self, expression: exp.DataType) -> str: 528 if ( 529 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 530 and not expression.expressions 531 ): 532 expression = exp.DataType.build("text") 533 elif expression.this in exp.DataType.TEMPORAL_TYPES: 534 expression = exp.DataType.build(expression.this) 535 elif expression.is_type("float"): 536 size_expression = expression.find(exp.DataTypeParam) 537 if size_expression: 538 size = int(size_expression.name) 539 expression = ( 540 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 541 ) 542 543 return super().datatype_sql(expression) 544 545 def version_sql(self, expression: exp.Version) -> str: 546 sql = super().version_sql(expression) 547 return sql.replace("FOR ", "", 1)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
195 class Tokenizer(tokens.Tokenizer): 196 QUOTES = ["'", '"'] 197 IDENTIFIERS = ["`"] 198 STRING_ESCAPES = ["\\"] 199 ENCODE = "utf-8" 200 201 KEYWORDS = { 202 **tokens.Tokenizer.KEYWORDS, 203 "ADD ARCHIVE": TokenType.COMMAND, 204 "ADD ARCHIVES": TokenType.COMMAND, 205 "ADD FILE": TokenType.COMMAND, 206 "ADD FILES": TokenType.COMMAND, 207 "ADD JAR": TokenType.COMMAND, 208 "ADD JARS": TokenType.COMMAND, 209 "MSCK REPAIR": TokenType.COMMAND, 210 "REFRESH": TokenType.COMMAND, 211 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 212 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 213 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 214 } 215 216 NUMERIC_LITERALS = { 217 "L": "BIGINT", 218 "S": "SMALLINT", 219 "Y": "TINYINT", 220 "D": "DOUBLE", 221 "F": "FLOAT", 222 "BD": "DECIMAL", 223 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>}
225 class Parser(parser.Parser): 226 LOG_DEFAULTS_TO_LN = True 227 STRICT_CAST = False 228 229 FUNCTIONS = { 230 **parser.Parser.FUNCTIONS, 231 "BASE64": exp.ToBase64.from_arg_list, 232 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 233 "COLLECT_SET": exp.SetAgg.from_arg_list, 234 "DATE_ADD": lambda args: exp.TsOrDsAdd( 235 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 236 ), 237 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 238 [ 239 exp.TimeStrToTime(this=seq_get(args, 0)), 240 seq_get(args, 1), 241 ] 242 ), 243 "DATE_SUB": lambda args: exp.TsOrDsAdd( 244 this=seq_get(args, 0), 245 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 246 unit=exp.Literal.string("DAY"), 247 ), 248 "DATEDIFF": lambda args: exp.DateDiff( 249 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 250 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 251 ), 252 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 253 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 254 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 255 "LOCATE": locate_to_strposition, 256 "MAP": parse_var_map, 257 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 258 "PERCENTILE": exp.Quantile.from_arg_list, 259 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 260 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 261 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 262 ), 263 "SIZE": exp.ArraySize.from_arg_list, 264 "SPLIT": exp.RegexpSplit.from_arg_list, 265 "STR_TO_MAP": lambda args: exp.StrToMap( 266 this=seq_get(args, 0), 267 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 268 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 269 ), 270 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 271 "TO_JSON": exp.JSONFormat.from_arg_list, 272 "UNBASE64": exp.FromBase64.from_arg_list, 273 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 274 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 275 } 276 277 NO_PAREN_FUNCTION_PARSERS = { 278 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 279 "TRANSFORM": lambda self: self._parse_transform(), 280 } 281 282 PROPERTY_PARSERS = { 283 **parser.Parser.PROPERTY_PARSERS, 284 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 285 expressions=self._parse_wrapped_csv(self._parse_property) 286 ), 287 } 288 289 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 290 if not self._match(TokenType.L_PAREN, advance=False): 291 self._retreat(self._index - 1) 292 return None 293 294 args = self._parse_wrapped_csv(self._parse_lambda) 295 row_format_before = self._parse_row_format(match_row=True) 296 297 record_writer = None 298 if self._match_text_seq("RECORDWRITER"): 299 record_writer = self._parse_string() 300 301 if not self._match(TokenType.USING): 302 return exp.Transform.from_arg_list(args) 303 304 command_script = self._parse_string() 305 306 self._match(TokenType.ALIAS) 307 schema = self._parse_schema() 308 309 row_format_after = self._parse_row_format(match_row=True) 310 record_reader = None 311 if self._match_text_seq("RECORDREADER"): 312 record_reader = self._parse_string() 313 314 return self.expression( 315 exp.QueryTransform, 316 expressions=args, 317 command_script=command_script, 318 schema=schema, 319 row_format_before=row_format_before, 320 record_writer=record_writer, 321 row_format_after=row_format_after, 322 record_reader=record_reader, 323 ) 324 325 def _parse_types( 326 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 327 ) -> t.Optional[exp.Expression]: 328 """ 329 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 330 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 331 332 spark-sql (default)> select cast(1234 as varchar(2)); 333 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 334 char/varchar type and simply treats them as string type. Please use string type 335 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 336 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 337 338 1234 339 Time taken: 4.265 seconds, Fetched 1 row(s) 340 341 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 342 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 343 344 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 345 """ 346 this = super()._parse_types( 347 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 348 ) 349 350 if this and not schema: 351 return this.transform( 352 lambda node: node.replace(exp.DataType.build("text")) 353 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 354 else node, 355 copy=False, 356 ) 357 358 return this 359 360 def _parse_partition_and_order( 361 self, 362 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 363 return ( 364 self._parse_csv(self._parse_conjunction) 365 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 366 else [], 367 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 368 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KINDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
370 class Generator(generator.Generator): 371 LIMIT_FETCH = "LIMIT" 372 TABLESAMPLE_WITH_METHOD = False 373 TABLESAMPLE_SIZE_IS_PERCENT = True 374 JOIN_HINTS = False 375 TABLE_HINTS = False 376 QUERY_HINTS = False 377 INDEX_ON = "ON TABLE" 378 EXTRACT_ALLOWS_QUOTES = False 379 NVL2_SUPPORTED = False 380 381 TYPE_MAPPING = { 382 **generator.Generator.TYPE_MAPPING, 383 exp.DataType.Type.BIT: "BOOLEAN", 384 exp.DataType.Type.DATETIME: "TIMESTAMP", 385 exp.DataType.Type.TEXT: "STRING", 386 exp.DataType.Type.TIME: "TIMESTAMP", 387 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 388 exp.DataType.Type.VARBINARY: "BINARY", 389 } 390 391 TRANSFORMS = { 392 **generator.Generator.TRANSFORMS, 393 exp.Group: transforms.preprocess([transforms.unalias_group]), 394 exp.Select: transforms.preprocess( 395 [ 396 transforms.eliminate_qualify, 397 transforms.eliminate_distinct_on, 398 transforms.unnest_to_explode, 399 ] 400 ), 401 exp.Property: _property_sql, 402 exp.AnyValue: rename_func("FIRST"), 403 exp.ApproxDistinct: approx_count_distinct_sql, 404 exp.ArrayConcat: rename_func("CONCAT"), 405 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 406 exp.ArraySize: rename_func("SIZE"), 407 exp.ArraySort: _array_sort_sql, 408 exp.With: no_recursive_cte_sql, 409 exp.DateAdd: _add_date_sql, 410 exp.DateDiff: _date_diff_sql, 411 exp.DateStrToDate: rename_func("TO_DATE"), 412 exp.DateSub: _add_date_sql, 413 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 414 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 415 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 416 exp.FromBase64: rename_func("UNBASE64"), 417 exp.If: if_sql(), 418 exp.ILike: no_ilike_sql, 419 exp.IsNan: rename_func("ISNAN"), 420 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 421 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 422 exp.JSONFormat: _json_format_sql, 423 exp.Left: left_to_substring_sql, 424 exp.Map: var_map_sql, 425 exp.Max: max_or_greatest, 426 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 427 exp.Min: min_or_least, 428 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 429 exp.NotNullColumnConstraint: lambda self, e: "" 430 if e.args.get("allow_null") 431 else "NOT NULL", 432 exp.VarMap: var_map_sql, 433 exp.Create: create_with_partitions_sql, 434 exp.Quantile: rename_func("PERCENTILE"), 435 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 436 exp.RegexpExtract: regexp_extract_sql, 437 exp.RegexpReplace: regexp_replace_sql, 438 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 439 exp.RegexpSplit: rename_func("SPLIT"), 440 exp.Right: right_to_substring_sql, 441 exp.SafeDivide: no_safe_divide_sql, 442 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 443 exp.SetAgg: rename_func("COLLECT_SET"), 444 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 445 exp.StrPosition: strposition_to_locate_sql, 446 exp.StrToDate: _str_to_date_sql, 447 exp.StrToTime: _str_to_time_sql, 448 exp.StrToUnix: _str_to_unix_sql, 449 exp.StructExtract: struct_extract_sql, 450 exp.TimeStrToDate: rename_func("TO_DATE"), 451 exp.TimeStrToTime: timestrtotime_sql, 452 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 453 exp.TimeToStr: _time_to_str, 454 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 455 exp.ToBase64: rename_func("BASE64"), 456 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 457 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 458 exp.TsOrDsToDate: _to_date_sql, 459 exp.TryCast: no_trycast_sql, 460 exp.UnixToStr: lambda self, e: self.func( 461 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 462 ), 463 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 464 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 465 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 466 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 467 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 468 exp.LastDateOfMonth: rename_func("LAST_DAY"), 469 exp.National: lambda self, e: self.national_sql(e, prefix=""), 470 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 471 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 472 exp.NotForReplicationColumnConstraint: lambda self, e: "", 473 exp.OnProperty: lambda self, e: "", 474 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 475 } 476 477 PROPERTIES_LOCATION = { 478 **generator.Generator.PROPERTIES_LOCATION, 479 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 480 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 481 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 482 } 483 484 def parameter_sql(self, expression: exp.Parameter) -> str: 485 this = self.sql(expression, "this") 486 parent = expression.parent 487 488 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 489 # We need to produce SET key = value instead of SET ${key} = value 490 return this 491 492 return f"${{{this}}}" 493 494 def schema_sql(self, expression: exp.Schema) -> str: 495 expression = expression.copy() 496 497 for ordered in expression.find_all(exp.Ordered): 498 if ordered.args.get("desc") is False: 499 ordered.set("desc", None) 500 501 return super().schema_sql(expression) 502 503 def constraint_sql(self, expression: exp.Constraint) -> str: 504 expression = expression.copy() 505 506 for prop in list(expression.find_all(exp.Properties)): 507 prop.pop() 508 509 this = self.sql(expression, "this") 510 expressions = self.expressions(expression, sep=" ", flat=True) 511 return f"CONSTRAINT {this} {expressions}" 512 513 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 514 serde_props = self.sql(expression, "serde_properties") 515 serde_props = f" {serde_props}" if serde_props else "" 516 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 517 518 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 519 return self.func( 520 "COLLECT_LIST", 521 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 522 ) 523 524 def with_properties(self, properties: exp.Properties) -> str: 525 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 526 527 def datatype_sql(self, expression: exp.DataType) -> str: 528 if ( 529 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 530 and not expression.expressions 531 ): 532 expression = exp.DataType.build("text") 533 elif expression.this in exp.DataType.TEMPORAL_TYPES: 534 expression = exp.DataType.build(expression.this) 535 elif expression.is_type("float"): 536 size_expression = expression.find(exp.DataTypeParam) 537 if size_expression: 538 size = int(size_expression.name) 539 expression = ( 540 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 541 ) 542 543 return super().datatype_sql(expression) 544 545 def version_sql(self, expression: exp.Version) -> str: 546 sql = super().version_sql(expression) 547 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function create_with_partitions_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
484 def parameter_sql(self, expression: exp.Parameter) -> str: 485 this = self.sql(expression, "this") 486 parent = expression.parent 487 488 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 489 # We need to produce SET key = value instead of SET ${key} = value 490 return this 491 492 return f"${{{this}}}"
503 def constraint_sql(self, expression: exp.Constraint) -> str: 504 expression = expression.copy() 505 506 for prop in list(expression.find_all(exp.Properties)): 507 prop.pop() 508 509 this = self.sql(expression, "this") 510 expressions = self.expressions(expression, sep=" ", flat=True) 511 return f"CONSTRAINT {this} {expressions}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
527 def datatype_sql(self, expression: exp.DataType) -> str: 528 if ( 529 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 530 and not expression.expressions 531 ): 532 expression = exp.DataType.build("text") 533 elif expression.this in exp.DataType.TEMPORAL_TYPES: 534 expression = exp.DataType.build(expression.this) 535 elif expression.is_type("float"): 536 size_expression = expression.find(exp.DataTypeParam) 537 if size_expression: 538 size = int(size_expression.name) 539 expression = ( 540 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 541 ) 542 543 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
269 @classmethod 270 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 271 """Checks if text can be identified given an identify option. 272 273 Args: 274 text: The text to check. 275 identify: 276 "always" or `True`: Always returns true. 277 "safe": True if the identifier is case-insensitive. 278 279 Returns: 280 Whether or not the given text can be identified. 281 """ 282 if identify is True or identify == "always": 283 return True 284 285 if identify == "safe": 286 return not cls.case_sensitive(text) 287 288 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- SUPPORTS_PARAMETERS
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- UNESCAPED_SEQUENCE_TABLE
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql