Edit on GitHub

sqlglot.dialects.hive

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens, transforms
  6from sqlglot.dialects.dialect import (
  7    DATE_ADD_OR_SUB,
  8    Dialect,
  9    NormalizationStrategy,
 10    approx_count_distinct_sql,
 11    arg_max_or_min_no_count,
 12    datestrtodate_sql,
 13    format_time_lambda,
 14    if_sql,
 15    is_parse_json,
 16    left_to_substring_sql,
 17    locate_to_strposition,
 18    max_or_greatest,
 19    min_or_least,
 20    no_ilike_sql,
 21    no_recursive_cte_sql,
 22    no_safe_divide_sql,
 23    no_trycast_sql,
 24    regexp_extract_sql,
 25    regexp_replace_sql,
 26    rename_func,
 27    right_to_substring_sql,
 28    strposition_to_locate_sql,
 29    struct_extract_sql,
 30    time_format,
 31    timestrtotime_sql,
 32    var_map_sql,
 33)
 34from sqlglot.transforms import (
 35    remove_unique_constraints,
 36    ctas_with_tmp_tables_to_create_tmp_view,
 37    preprocess,
 38    move_schema_columns_to_partitioned_by,
 39)
 40from sqlglot.helper import seq_get
 41from sqlglot.parser import parse_var_map
 42from sqlglot.tokens import TokenType
 43
 44# (FuncType, Multiplier)
 45DATE_DELTA_INTERVAL = {
 46    "YEAR": ("ADD_MONTHS", 12),
 47    "MONTH": ("ADD_MONTHS", 1),
 48    "QUARTER": ("ADD_MONTHS", 3),
 49    "WEEK": ("DATE_ADD", 7),
 50    "DAY": ("DATE_ADD", 1),
 51}
 52
 53TIME_DIFF_FACTOR = {
 54    "MILLISECOND": " * 1000",
 55    "SECOND": "",
 56    "MINUTE": " / 60",
 57    "HOUR": " / 3600",
 58}
 59
 60DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH")
 61
 62
 63def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str:
 64    if isinstance(expression, exp.TsOrDsAdd) and not expression.unit:
 65        return self.func("DATE_ADD", expression.this, expression.expression)
 66
 67    unit = expression.text("unit").upper()
 68    func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1))
 69
 70    if isinstance(expression, exp.DateSub):
 71        multiplier *= -1
 72
 73    if expression.expression.is_number:
 74        modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier)
 75    else:
 76        modified_increment = expression.expression
 77        if multiplier != 1:
 78            modified_increment = exp.Mul(  # type: ignore
 79                this=modified_increment, expression=exp.Literal.number(multiplier)
 80            )
 81
 82    return self.func(func, expression.this, modified_increment)
 83
 84
 85def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str:
 86    unit = expression.text("unit").upper()
 87
 88    factor = TIME_DIFF_FACTOR.get(unit)
 89    if factor is not None:
 90        left = self.sql(expression, "this")
 91        right = self.sql(expression, "expression")
 92        sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})"
 93        return f"({sec_diff}){factor}" if factor else sec_diff
 94
 95    months_between = unit in DIFF_MONTH_SWITCH
 96    sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF"
 97    _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1))
 98    multiplier_sql = f" / {multiplier}" if multiplier > 1 else ""
 99    diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})"
100
101    if months_between or multiplier_sql:
102        # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part.
103        # For the same reason, we want to truncate if there's a divisor present.
104        diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)"
105
106    return diff_sql
107
108
109def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str:
110    this = expression.this
111
112    if is_parse_json(this):
113        if this.this.is_string:
114            # Since FROM_JSON requires a nested type, we always wrap the json string with
115            # an array to ensure that "naked" strings like "'a'" will be handled correctly
116            wrapped_json = exp.Literal.string(f"[{this.this.name}]")
117
118            from_json = self.func(
119                "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)
120            )
121            to_json = self.func("TO_JSON", from_json)
122
123            # This strips the [, ] delimiters of the dummy array printed by TO_JSON
124            return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1")
125        return self.sql(this)
126
127    return self.func("TO_JSON", this, expression.args.get("options"))
128
129
130def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str:
131    if expression.expression:
132        self.unsupported("Hive SORT_ARRAY does not support a comparator")
133    return f"SORT_ARRAY({self.sql(expression, 'this')})"
134
135
136def _property_sql(self: Hive.Generator, expression: exp.Property) -> str:
137    return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}"
138
139
140def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str:
141    return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression))
142
143
144def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str:
145    this = self.sql(expression, "this")
146    time_format = self.format_time(expression)
147    if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
148        this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))"
149    return f"CAST({this} AS DATE)"
150
151
152def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str:
153    this = self.sql(expression, "this")
154    time_format = self.format_time(expression)
155    if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
156        this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))"
157    return f"CAST({this} AS TIMESTAMP)"
158
159
160def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str:
161    this = self.sql(expression, "this")
162    time_format = self.format_time(expression)
163    return f"DATE_FORMAT({this}, {time_format})"
164
165
166def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str:
167    this = self.sql(expression, "this")
168    time_format = self.format_time(expression)
169    if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
170        return f"TO_DATE({this}, {time_format})"
171    if isinstance(expression.this, exp.TsOrDsToDate):
172        return this
173    return f"TO_DATE({this})"
174
175
176def _parse_ignore_nulls(
177    exp_class: t.Type[exp.Expression],
178) -> t.Callable[[t.List[exp.Expression]], exp.Expression]:
179    def _parse(args: t.List[exp.Expression]) -> exp.Expression:
180        this = exp_class(this=seq_get(args, 0))
181        if seq_get(args, 1) == exp.true():
182            return exp.IgnoreNulls(this=this)
183        return this
184
185    return _parse
186
187
188class Hive(Dialect):
189    ALIAS_POST_TABLESAMPLE = True
190    IDENTIFIERS_CAN_START_WITH_DIGIT = True
191    SUPPORTS_USER_DEFINED_TYPES = False
192    SAFE_DIVISION = True
193
194    # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
195    NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
196
197    TIME_MAPPING = {
198        "y": "%Y",
199        "Y": "%Y",
200        "YYYY": "%Y",
201        "yyyy": "%Y",
202        "YY": "%y",
203        "yy": "%y",
204        "MMMM": "%B",
205        "MMM": "%b",
206        "MM": "%m",
207        "M": "%-m",
208        "dd": "%d",
209        "d": "%-d",
210        "HH": "%H",
211        "H": "%-H",
212        "hh": "%I",
213        "h": "%-I",
214        "mm": "%M",
215        "m": "%-M",
216        "ss": "%S",
217        "s": "%-S",
218        "SSSSSS": "%f",
219        "a": "%p",
220        "DD": "%j",
221        "D": "%-j",
222        "E": "%a",
223        "EE": "%a",
224        "EEE": "%a",
225        "EEEE": "%A",
226    }
227
228    DATE_FORMAT = "'yyyy-MM-dd'"
229    DATEINT_FORMAT = "'yyyyMMdd'"
230    TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
231
232    class Tokenizer(tokens.Tokenizer):
233        QUOTES = ["'", '"']
234        IDENTIFIERS = ["`"]
235        STRING_ESCAPES = ["\\"]
236
237        SINGLE_TOKENS = {
238            **tokens.Tokenizer.SINGLE_TOKENS,
239            "$": TokenType.PARAMETER,
240        }
241
242        KEYWORDS = {
243            **tokens.Tokenizer.KEYWORDS,
244            "ADD ARCHIVE": TokenType.COMMAND,
245            "ADD ARCHIVES": TokenType.COMMAND,
246            "ADD FILE": TokenType.COMMAND,
247            "ADD FILES": TokenType.COMMAND,
248            "ADD JAR": TokenType.COMMAND,
249            "ADD JARS": TokenType.COMMAND,
250            "MSCK REPAIR": TokenType.COMMAND,
251            "REFRESH": TokenType.REFRESH,
252            "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
253            "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
254            "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
255        }
256
257        NUMERIC_LITERALS = {
258            "L": "BIGINT",
259            "S": "SMALLINT",
260            "Y": "TINYINT",
261            "D": "DOUBLE",
262            "F": "FLOAT",
263            "BD": "DECIMAL",
264        }
265
266    class Parser(parser.Parser):
267        LOG_DEFAULTS_TO_LN = True
268        STRICT_CAST = False
269
270        FUNCTIONS = {
271            **parser.Parser.FUNCTIONS,
272            "BASE64": exp.ToBase64.from_arg_list,
273            "COLLECT_LIST": exp.ArrayAgg.from_arg_list,
274            "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
275            "DATE_ADD": lambda args: exp.TsOrDsAdd(
276                this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY")
277            ),
278            "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")(
279                [
280                    exp.TimeStrToTime(this=seq_get(args, 0)),
281                    seq_get(args, 1),
282                ]
283            ),
284            "DATE_SUB": lambda args: exp.TsOrDsAdd(
285                this=seq_get(args, 0),
286                expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
287                unit=exp.Literal.string("DAY"),
288            ),
289            "DATEDIFF": lambda args: exp.DateDiff(
290                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
291                expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
292            ),
293            "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
294            "FIRST": _parse_ignore_nulls(exp.First),
295            "FIRST_VALUE": _parse_ignore_nulls(exp.FirstValue),
296            "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True),
297            "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
298            "LAST": _parse_ignore_nulls(exp.Last),
299            "LAST_VALUE": _parse_ignore_nulls(exp.LastValue),
300            "LOCATE": locate_to_strposition,
301            "MAP": parse_var_map,
302            "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
303            "PERCENTILE": exp.Quantile.from_arg_list,
304            "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
305            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
306                this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2)
307            ),
308            "SIZE": exp.ArraySize.from_arg_list,
309            "SPLIT": exp.RegexpSplit.from_arg_list,
310            "STR_TO_MAP": lambda args: exp.StrToMap(
311                this=seq_get(args, 0),
312                pair_delim=seq_get(args, 1) or exp.Literal.string(","),
313                key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
314            ),
315            "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"),
316            "TO_JSON": exp.JSONFormat.from_arg_list,
317            "UNBASE64": exp.FromBase64.from_arg_list,
318            "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True),
319            "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
320        }
321
322        NO_PAREN_FUNCTION_PARSERS = {
323            **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
324            "TRANSFORM": lambda self: self._parse_transform(),
325        }
326
327        PROPERTY_PARSERS = {
328            **parser.Parser.PROPERTY_PARSERS,
329            "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
330                expressions=self._parse_wrapped_csv(self._parse_property)
331            ),
332        }
333
334        def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
335            if not self._match(TokenType.L_PAREN, advance=False):
336                self._retreat(self._index - 1)
337                return None
338
339            args = self._parse_wrapped_csv(self._parse_lambda)
340            row_format_before = self._parse_row_format(match_row=True)
341
342            record_writer = None
343            if self._match_text_seq("RECORDWRITER"):
344                record_writer = self._parse_string()
345
346            if not self._match(TokenType.USING):
347                return exp.Transform.from_arg_list(args)
348
349            command_script = self._parse_string()
350
351            self._match(TokenType.ALIAS)
352            schema = self._parse_schema()
353
354            row_format_after = self._parse_row_format(match_row=True)
355            record_reader = None
356            if self._match_text_seq("RECORDREADER"):
357                record_reader = self._parse_string()
358
359            return self.expression(
360                exp.QueryTransform,
361                expressions=args,
362                command_script=command_script,
363                schema=schema,
364                row_format_before=row_format_before,
365                record_writer=record_writer,
366                row_format_after=row_format_after,
367                record_reader=record_reader,
368            )
369
370        def _parse_types(
371            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
372        ) -> t.Optional[exp.Expression]:
373            """
374            Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
375            STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
376
377                spark-sql (default)> select cast(1234 as varchar(2));
378                23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
379                char/varchar type and simply treats them as string type. Please use string type
380                directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
381                to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
382
383                1234
384                Time taken: 4.265 seconds, Fetched 1 row(s)
385
386            This shows that Spark doesn't truncate the value into '12', which is inconsistent with
387            what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
388
389            Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
390            """
391            this = super()._parse_types(
392                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
393            )
394
395            if this and not schema:
396                return this.transform(
397                    lambda node: (
398                        node.replace(exp.DataType.build("text"))
399                        if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
400                        else node
401                    ),
402                    copy=False,
403                )
404
405            return this
406
407        def _parse_partition_and_order(
408            self,
409        ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
410            return (
411                (
412                    self._parse_csv(self._parse_conjunction)
413                    if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
414                    else []
415                ),
416                super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
417            )
418
419    class Generator(generator.Generator):
420        LIMIT_FETCH = "LIMIT"
421        TABLESAMPLE_WITH_METHOD = False
422        JOIN_HINTS = False
423        TABLE_HINTS = False
424        QUERY_HINTS = False
425        INDEX_ON = "ON TABLE"
426        EXTRACT_ALLOWS_QUOTES = False
427        NVL2_SUPPORTED = False
428        LAST_DAY_SUPPORTS_DATE_PART = False
429        JSON_PATH_SINGLE_QUOTE_ESCAPE = True
430
431        EXPRESSIONS_WITHOUT_NESTED_CTES = {
432            exp.Insert,
433            exp.Select,
434            exp.Subquery,
435            exp.Union,
436        }
437
438        SUPPORTED_JSON_PATH_PARTS = {
439            exp.JSONPathKey,
440            exp.JSONPathRoot,
441            exp.JSONPathSubscript,
442            exp.JSONPathWildcard,
443        }
444
445        TYPE_MAPPING = {
446            **generator.Generator.TYPE_MAPPING,
447            exp.DataType.Type.BIT: "BOOLEAN",
448            exp.DataType.Type.DATETIME: "TIMESTAMP",
449            exp.DataType.Type.TEXT: "STRING",
450            exp.DataType.Type.TIME: "TIMESTAMP",
451            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
452            exp.DataType.Type.VARBINARY: "BINARY",
453        }
454
455        TRANSFORMS = {
456            **generator.Generator.TRANSFORMS,
457            exp.Group: transforms.preprocess([transforms.unalias_group]),
458            exp.Select: transforms.preprocess(
459                [
460                    transforms.eliminate_qualify,
461                    transforms.eliminate_distinct_on,
462                    transforms.unnest_to_explode,
463                ]
464            ),
465            exp.Property: _property_sql,
466            exp.AnyValue: rename_func("FIRST"),
467            exp.ApproxDistinct: approx_count_distinct_sql,
468            exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
469            exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
470            exp.ArrayConcat: rename_func("CONCAT"),
471            exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
472            exp.ArraySize: rename_func("SIZE"),
473            exp.ArraySort: _array_sort_sql,
474            exp.With: no_recursive_cte_sql,
475            exp.DateAdd: _add_date_sql,
476            exp.DateDiff: _date_diff_sql,
477            exp.DateStrToDate: datestrtodate_sql,
478            exp.DateSub: _add_date_sql,
479            exp.DateToDi: lambda self,
480            e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)",
481            exp.DiToDate: lambda self,
482            e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})",
483            exp.FileFormatProperty: lambda self,
484            e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}",
485            exp.FromBase64: rename_func("UNBASE64"),
486            exp.If: if_sql(),
487            exp.ILike: no_ilike_sql,
488            exp.IsNan: rename_func("ISNAN"),
489            exp.JSONExtract: rename_func("GET_JSON_OBJECT"),
490            exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"),
491            exp.JSONFormat: _json_format_sql,
492            exp.Left: left_to_substring_sql,
493            exp.Map: var_map_sql,
494            exp.Max: max_or_greatest,
495            exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
496            exp.Min: min_or_least,
497            exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
498            exp.NotNullColumnConstraint: lambda self, e: (
499                "" if e.args.get("allow_null") else "NOT NULL"
500            ),
501            exp.VarMap: var_map_sql,
502            exp.Create: preprocess(
503                [
504                    remove_unique_constraints,
505                    ctas_with_tmp_tables_to_create_tmp_view,
506                    move_schema_columns_to_partitioned_by,
507                ]
508            ),
509            exp.Quantile: rename_func("PERCENTILE"),
510            exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
511            exp.RegexpExtract: regexp_extract_sql,
512            exp.RegexpReplace: regexp_replace_sql,
513            exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
514            exp.RegexpSplit: rename_func("SPLIT"),
515            exp.Right: right_to_substring_sql,
516            exp.SafeDivide: no_safe_divide_sql,
517            exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
518            exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
519            exp.Split: lambda self,
520            e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))",
521            exp.StrPosition: strposition_to_locate_sql,
522            exp.StrToDate: _str_to_date_sql,
523            exp.StrToTime: _str_to_time_sql,
524            exp.StrToUnix: _str_to_unix_sql,
525            exp.StructExtract: struct_extract_sql,
526            exp.TimeStrToDate: rename_func("TO_DATE"),
527            exp.TimeStrToTime: timestrtotime_sql,
528            exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"),
529            exp.TimeToStr: _time_to_str,
530            exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
531            exp.ToBase64: rename_func("BASE64"),
532            exp.TsOrDiToDi: lambda self,
533            e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)",
534            exp.TsOrDsAdd: _add_date_sql,
535            exp.TsOrDsDiff: _date_diff_sql,
536            exp.TsOrDsToDate: _to_date_sql,
537            exp.TryCast: no_trycast_sql,
538            exp.UnixToStr: lambda self, e: self.func(
539                "FROM_UNIXTIME", e.this, time_format("hive")(self, e)
540            ),
541            exp.UnixToTime: rename_func("FROM_UNIXTIME"),
542            exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
543            exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}",
544            exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"),
545            exp.NumberToStr: rename_func("FORMAT_NUMBER"),
546            exp.National: lambda self, e: self.national_sql(e, prefix=""),
547            exp.ClusteredColumnConstraint: lambda self,
548            e: f"({self.expressions(e, 'this', indent=False)})",
549            exp.NonClusteredColumnConstraint: lambda self,
550            e: f"({self.expressions(e, 'this', indent=False)})",
551            exp.NotForReplicationColumnConstraint: lambda self, e: "",
552            exp.OnProperty: lambda self, e: "",
553            exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY",
554        }
555
556        PROPERTIES_LOCATION = {
557            **generator.Generator.PROPERTIES_LOCATION,
558            exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA,
559            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
560            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
561            exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED,
562        }
563
564        def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str:
565            if isinstance(expression.this, exp.JSONPathWildcard):
566                self.unsupported("Unsupported wildcard in JSONPathKey expression")
567                return ""
568
569            return super()._jsonpathkey_sql(expression)
570
571        def parameter_sql(self, expression: exp.Parameter) -> str:
572            this = self.sql(expression, "this")
573            expression_sql = self.sql(expression, "expression")
574
575            parent = expression.parent
576            this = f"{this}:{expression_sql}" if expression_sql else this
577
578            if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
579                # We need to produce SET key = value instead of SET ${key} = value
580                return this
581
582            return f"${{{this}}}"
583
584        def schema_sql(self, expression: exp.Schema) -> str:
585            for ordered in expression.find_all(exp.Ordered):
586                if ordered.args.get("desc") is False:
587                    ordered.set("desc", None)
588
589            return super().schema_sql(expression)
590
591        def constraint_sql(self, expression: exp.Constraint) -> str:
592            for prop in list(expression.find_all(exp.Properties)):
593                prop.pop()
594
595            this = self.sql(expression, "this")
596            expressions = self.expressions(expression, sep=" ", flat=True)
597            return f"CONSTRAINT {this} {expressions}"
598
599        def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
600            serde_props = self.sql(expression, "serde_properties")
601            serde_props = f" {serde_props}" if serde_props else ""
602            return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
603
604        def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
605            return self.func(
606                "COLLECT_LIST",
607                expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
608            )
609
610        def with_properties(self, properties: exp.Properties) -> str:
611            return self.properties(properties, prefix=self.seg("TBLPROPERTIES"))
612
613        def datatype_sql(self, expression: exp.DataType) -> str:
614            if (
615                expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR)
616                and not expression.expressions
617            ):
618                expression = exp.DataType.build("text")
619            elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
620                expression.set("this", exp.DataType.Type.VARCHAR)
621            elif expression.this in exp.DataType.TEMPORAL_TYPES:
622                expression = exp.DataType.build(expression.this)
623            elif expression.is_type("float"):
624                size_expression = expression.find(exp.DataTypeParam)
625                if size_expression:
626                    size = int(size_expression.name)
627                    expression = (
628                        exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
629                    )
630
631            return super().datatype_sql(expression)
632
633        def version_sql(self, expression: exp.Version) -> str:
634            sql = super().version_sql(expression)
635            return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL = {'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR = {'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH = ('YEAR', 'QUARTER', 'MONTH')
class Hive(sqlglot.dialects.dialect.Dialect):
189class Hive(Dialect):
190    ALIAS_POST_TABLESAMPLE = True
191    IDENTIFIERS_CAN_START_WITH_DIGIT = True
192    SUPPORTS_USER_DEFINED_TYPES = False
193    SAFE_DIVISION = True
194
195    # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
196    NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
197
198    TIME_MAPPING = {
199        "y": "%Y",
200        "Y": "%Y",
201        "YYYY": "%Y",
202        "yyyy": "%Y",
203        "YY": "%y",
204        "yy": "%y",
205        "MMMM": "%B",
206        "MMM": "%b",
207        "MM": "%m",
208        "M": "%-m",
209        "dd": "%d",
210        "d": "%-d",
211        "HH": "%H",
212        "H": "%-H",
213        "hh": "%I",
214        "h": "%-I",
215        "mm": "%M",
216        "m": "%-M",
217        "ss": "%S",
218        "s": "%-S",
219        "SSSSSS": "%f",
220        "a": "%p",
221        "DD": "%j",
222        "D": "%-j",
223        "E": "%a",
224        "EE": "%a",
225        "EEE": "%a",
226        "EEEE": "%A",
227    }
228
229    DATE_FORMAT = "'yyyy-MM-dd'"
230    DATEINT_FORMAT = "'yyyyMMdd'"
231    TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
232
233    class Tokenizer(tokens.Tokenizer):
234        QUOTES = ["'", '"']
235        IDENTIFIERS = ["`"]
236        STRING_ESCAPES = ["\\"]
237
238        SINGLE_TOKENS = {
239            **tokens.Tokenizer.SINGLE_TOKENS,
240            "$": TokenType.PARAMETER,
241        }
242
243        KEYWORDS = {
244            **tokens.Tokenizer.KEYWORDS,
245            "ADD ARCHIVE": TokenType.COMMAND,
246            "ADD ARCHIVES": TokenType.COMMAND,
247            "ADD FILE": TokenType.COMMAND,
248            "ADD FILES": TokenType.COMMAND,
249            "ADD JAR": TokenType.COMMAND,
250            "ADD JARS": TokenType.COMMAND,
251            "MSCK REPAIR": TokenType.COMMAND,
252            "REFRESH": TokenType.REFRESH,
253            "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
254            "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
255            "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
256        }
257
258        NUMERIC_LITERALS = {
259            "L": "BIGINT",
260            "S": "SMALLINT",
261            "Y": "TINYINT",
262            "D": "DOUBLE",
263            "F": "FLOAT",
264            "BD": "DECIMAL",
265        }
266
267    class Parser(parser.Parser):
268        LOG_DEFAULTS_TO_LN = True
269        STRICT_CAST = False
270
271        FUNCTIONS = {
272            **parser.Parser.FUNCTIONS,
273            "BASE64": exp.ToBase64.from_arg_list,
274            "COLLECT_LIST": exp.ArrayAgg.from_arg_list,
275            "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
276            "DATE_ADD": lambda args: exp.TsOrDsAdd(
277                this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY")
278            ),
279            "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")(
280                [
281                    exp.TimeStrToTime(this=seq_get(args, 0)),
282                    seq_get(args, 1),
283                ]
284            ),
285            "DATE_SUB": lambda args: exp.TsOrDsAdd(
286                this=seq_get(args, 0),
287                expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
288                unit=exp.Literal.string("DAY"),
289            ),
290            "DATEDIFF": lambda args: exp.DateDiff(
291                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
292                expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
293            ),
294            "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
295            "FIRST": _parse_ignore_nulls(exp.First),
296            "FIRST_VALUE": _parse_ignore_nulls(exp.FirstValue),
297            "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True),
298            "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
299            "LAST": _parse_ignore_nulls(exp.Last),
300            "LAST_VALUE": _parse_ignore_nulls(exp.LastValue),
301            "LOCATE": locate_to_strposition,
302            "MAP": parse_var_map,
303            "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
304            "PERCENTILE": exp.Quantile.from_arg_list,
305            "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
306            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
307                this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2)
308            ),
309            "SIZE": exp.ArraySize.from_arg_list,
310            "SPLIT": exp.RegexpSplit.from_arg_list,
311            "STR_TO_MAP": lambda args: exp.StrToMap(
312                this=seq_get(args, 0),
313                pair_delim=seq_get(args, 1) or exp.Literal.string(","),
314                key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
315            ),
316            "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"),
317            "TO_JSON": exp.JSONFormat.from_arg_list,
318            "UNBASE64": exp.FromBase64.from_arg_list,
319            "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True),
320            "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
321        }
322
323        NO_PAREN_FUNCTION_PARSERS = {
324            **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
325            "TRANSFORM": lambda self: self._parse_transform(),
326        }
327
328        PROPERTY_PARSERS = {
329            **parser.Parser.PROPERTY_PARSERS,
330            "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
331                expressions=self._parse_wrapped_csv(self._parse_property)
332            ),
333        }
334
335        def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
336            if not self._match(TokenType.L_PAREN, advance=False):
337                self._retreat(self._index - 1)
338                return None
339
340            args = self._parse_wrapped_csv(self._parse_lambda)
341            row_format_before = self._parse_row_format(match_row=True)
342
343            record_writer = None
344            if self._match_text_seq("RECORDWRITER"):
345                record_writer = self._parse_string()
346
347            if not self._match(TokenType.USING):
348                return exp.Transform.from_arg_list(args)
349
350            command_script = self._parse_string()
351
352            self._match(TokenType.ALIAS)
353            schema = self._parse_schema()
354
355            row_format_after = self._parse_row_format(match_row=True)
356            record_reader = None
357            if self._match_text_seq("RECORDREADER"):
358                record_reader = self._parse_string()
359
360            return self.expression(
361                exp.QueryTransform,
362                expressions=args,
363                command_script=command_script,
364                schema=schema,
365                row_format_before=row_format_before,
366                record_writer=record_writer,
367                row_format_after=row_format_after,
368                record_reader=record_reader,
369            )
370
371        def _parse_types(
372            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
373        ) -> t.Optional[exp.Expression]:
374            """
375            Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
376            STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
377
378                spark-sql (default)> select cast(1234 as varchar(2));
379                23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
380                char/varchar type and simply treats them as string type. Please use string type
381                directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
382                to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
383
384                1234
385                Time taken: 4.265 seconds, Fetched 1 row(s)
386
387            This shows that Spark doesn't truncate the value into '12', which is inconsistent with
388            what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
389
390            Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
391            """
392            this = super()._parse_types(
393                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
394            )
395
396            if this and not schema:
397                return this.transform(
398                    lambda node: (
399                        node.replace(exp.DataType.build("text"))
400                        if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
401                        else node
402                    ),
403                    copy=False,
404                )
405
406            return this
407
408        def _parse_partition_and_order(
409            self,
410        ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
411            return (
412                (
413                    self._parse_csv(self._parse_conjunction)
414                    if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
415                    else []
416                ),
417                super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
418            )
419
420    class Generator(generator.Generator):
421        LIMIT_FETCH = "LIMIT"
422        TABLESAMPLE_WITH_METHOD = False
423        JOIN_HINTS = False
424        TABLE_HINTS = False
425        QUERY_HINTS = False
426        INDEX_ON = "ON TABLE"
427        EXTRACT_ALLOWS_QUOTES = False
428        NVL2_SUPPORTED = False
429        LAST_DAY_SUPPORTS_DATE_PART = False
430        JSON_PATH_SINGLE_QUOTE_ESCAPE = True
431
432        EXPRESSIONS_WITHOUT_NESTED_CTES = {
433            exp.Insert,
434            exp.Select,
435            exp.Subquery,
436            exp.Union,
437        }
438
439        SUPPORTED_JSON_PATH_PARTS = {
440            exp.JSONPathKey,
441            exp.JSONPathRoot,
442            exp.JSONPathSubscript,
443            exp.JSONPathWildcard,
444        }
445
446        TYPE_MAPPING = {
447            **generator.Generator.TYPE_MAPPING,
448            exp.DataType.Type.BIT: "BOOLEAN",
449            exp.DataType.Type.DATETIME: "TIMESTAMP",
450            exp.DataType.Type.TEXT: "STRING",
451            exp.DataType.Type.TIME: "TIMESTAMP",
452            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
453            exp.DataType.Type.VARBINARY: "BINARY",
454        }
455
456        TRANSFORMS = {
457            **generator.Generator.TRANSFORMS,
458            exp.Group: transforms.preprocess([transforms.unalias_group]),
459            exp.Select: transforms.preprocess(
460                [
461                    transforms.eliminate_qualify,
462                    transforms.eliminate_distinct_on,
463                    transforms.unnest_to_explode,
464                ]
465            ),
466            exp.Property: _property_sql,
467            exp.AnyValue: rename_func("FIRST"),
468            exp.ApproxDistinct: approx_count_distinct_sql,
469            exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
470            exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
471            exp.ArrayConcat: rename_func("CONCAT"),
472            exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
473            exp.ArraySize: rename_func("SIZE"),
474            exp.ArraySort: _array_sort_sql,
475            exp.With: no_recursive_cte_sql,
476            exp.DateAdd: _add_date_sql,
477            exp.DateDiff: _date_diff_sql,
478            exp.DateStrToDate: datestrtodate_sql,
479            exp.DateSub: _add_date_sql,
480            exp.DateToDi: lambda self,
481            e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)",
482            exp.DiToDate: lambda self,
483            e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})",
484            exp.FileFormatProperty: lambda self,
485            e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}",
486            exp.FromBase64: rename_func("UNBASE64"),
487            exp.If: if_sql(),
488            exp.ILike: no_ilike_sql,
489            exp.IsNan: rename_func("ISNAN"),
490            exp.JSONExtract: rename_func("GET_JSON_OBJECT"),
491            exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"),
492            exp.JSONFormat: _json_format_sql,
493            exp.Left: left_to_substring_sql,
494            exp.Map: var_map_sql,
495            exp.Max: max_or_greatest,
496            exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
497            exp.Min: min_or_least,
498            exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
499            exp.NotNullColumnConstraint: lambda self, e: (
500                "" if e.args.get("allow_null") else "NOT NULL"
501            ),
502            exp.VarMap: var_map_sql,
503            exp.Create: preprocess(
504                [
505                    remove_unique_constraints,
506                    ctas_with_tmp_tables_to_create_tmp_view,
507                    move_schema_columns_to_partitioned_by,
508                ]
509            ),
510            exp.Quantile: rename_func("PERCENTILE"),
511            exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
512            exp.RegexpExtract: regexp_extract_sql,
513            exp.RegexpReplace: regexp_replace_sql,
514            exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
515            exp.RegexpSplit: rename_func("SPLIT"),
516            exp.Right: right_to_substring_sql,
517            exp.SafeDivide: no_safe_divide_sql,
518            exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
519            exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
520            exp.Split: lambda self,
521            e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))",
522            exp.StrPosition: strposition_to_locate_sql,
523            exp.StrToDate: _str_to_date_sql,
524            exp.StrToTime: _str_to_time_sql,
525            exp.StrToUnix: _str_to_unix_sql,
526            exp.StructExtract: struct_extract_sql,
527            exp.TimeStrToDate: rename_func("TO_DATE"),
528            exp.TimeStrToTime: timestrtotime_sql,
529            exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"),
530            exp.TimeToStr: _time_to_str,
531            exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
532            exp.ToBase64: rename_func("BASE64"),
533            exp.TsOrDiToDi: lambda self,
534            e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)",
535            exp.TsOrDsAdd: _add_date_sql,
536            exp.TsOrDsDiff: _date_diff_sql,
537            exp.TsOrDsToDate: _to_date_sql,
538            exp.TryCast: no_trycast_sql,
539            exp.UnixToStr: lambda self, e: self.func(
540                "FROM_UNIXTIME", e.this, time_format("hive")(self, e)
541            ),
542            exp.UnixToTime: rename_func("FROM_UNIXTIME"),
543            exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
544            exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}",
545            exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"),
546            exp.NumberToStr: rename_func("FORMAT_NUMBER"),
547            exp.National: lambda self, e: self.national_sql(e, prefix=""),
548            exp.ClusteredColumnConstraint: lambda self,
549            e: f"({self.expressions(e, 'this', indent=False)})",
550            exp.NonClusteredColumnConstraint: lambda self,
551            e: f"({self.expressions(e, 'this', indent=False)})",
552            exp.NotForReplicationColumnConstraint: lambda self, e: "",
553            exp.OnProperty: lambda self, e: "",
554            exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY",
555        }
556
557        PROPERTIES_LOCATION = {
558            **generator.Generator.PROPERTIES_LOCATION,
559            exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA,
560            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
561            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
562            exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED,
563        }
564
565        def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str:
566            if isinstance(expression.this, exp.JSONPathWildcard):
567                self.unsupported("Unsupported wildcard in JSONPathKey expression")
568                return ""
569
570            return super()._jsonpathkey_sql(expression)
571
572        def parameter_sql(self, expression: exp.Parameter) -> str:
573            this = self.sql(expression, "this")
574            expression_sql = self.sql(expression, "expression")
575
576            parent = expression.parent
577            this = f"{this}:{expression_sql}" if expression_sql else this
578
579            if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
580                # We need to produce SET key = value instead of SET ${key} = value
581                return this
582
583            return f"${{{this}}}"
584
585        def schema_sql(self, expression: exp.Schema) -> str:
586            for ordered in expression.find_all(exp.Ordered):
587                if ordered.args.get("desc") is False:
588                    ordered.set("desc", None)
589
590            return super().schema_sql(expression)
591
592        def constraint_sql(self, expression: exp.Constraint) -> str:
593            for prop in list(expression.find_all(exp.Properties)):
594                prop.pop()
595
596            this = self.sql(expression, "this")
597            expressions = self.expressions(expression, sep=" ", flat=True)
598            return f"CONSTRAINT {this} {expressions}"
599
600        def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
601            serde_props = self.sql(expression, "serde_properties")
602            serde_props = f" {serde_props}" if serde_props else ""
603            return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
604
605        def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
606            return self.func(
607                "COLLECT_LIST",
608                expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
609            )
610
611        def with_properties(self, properties: exp.Properties) -> str:
612            return self.properties(properties, prefix=self.seg("TBLPROPERTIES"))
613
614        def datatype_sql(self, expression: exp.DataType) -> str:
615            if (
616                expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR)
617                and not expression.expressions
618            ):
619                expression = exp.DataType.build("text")
620            elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
621                expression.set("this", exp.DataType.Type.VARCHAR)
622            elif expression.this in exp.DataType.TEMPORAL_TYPES:
623                expression = exp.DataType.build(expression.this)
624            elif expression.is_type("float"):
625                size_expression = expression.find(exp.DataTypeParam)
626                if size_expression:
627                    size = int(size_expression.name)
628                    expression = (
629                        exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
630                    )
631
632            return super().datatype_sql(expression)
633
634        def version_sql(self, expression: exp.Version) -> str:
635            sql = super().version_sql(expression)
636            return sql.replace("FOR ", "", 1)
ALIAS_POST_TABLESAMPLE = True

Determines whether or not the table alias comes after tablesample.

IDENTIFIERS_CAN_START_WITH_DIGIT = True

Determines whether or not an unquoted identifier can start with a digit.

SUPPORTS_USER_DEFINED_TYPES = False

Determines whether or not user-defined data types are supported.

SAFE_DIVISION = True

Determines whether division by zero throws an error (False) or returns NULL (True).

NORMALIZATION_STRATEGY = <NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>

Specifies the strategy according to which identifiers should be normalized.

TIME_MAPPING: Dict[str, str] = {'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}

Associates this dialect's time formats with their equivalent Python strftime format.

DATE_FORMAT = "'yyyy-MM-dd'"
DATEINT_FORMAT = "'yyyyMMdd'"
TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
tokenizer_class = <class 'Hive.Tokenizer'>
parser_class = <class 'Hive.Parser'>
generator_class = <class 'Hive.Generator'>
TIME_TRIE: Dict = {'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict = {'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] = {'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict = {'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
INVERSE_ESCAPE_SEQUENCES: Dict[str, str] = {}
QUOTE_START = "'"
QUOTE_END = "'"
IDENTIFIER_START = '`'
IDENTIFIER_END = '`'
BIT_START: Optional[str] = None
BIT_END: Optional[str] = None
HEX_START: Optional[str] = None
HEX_END: Optional[str] = None
BYTE_START: Optional[str] = None
BYTE_END: Optional[str] = None
UNICODE_START: Optional[str] = None
UNICODE_END: Optional[str] = None
class Hive.Tokenizer(sqlglot.tokens.Tokenizer):
233    class Tokenizer(tokens.Tokenizer):
234        QUOTES = ["'", '"']
235        IDENTIFIERS = ["`"]
236        STRING_ESCAPES = ["\\"]
237
238        SINGLE_TOKENS = {
239            **tokens.Tokenizer.SINGLE_TOKENS,
240            "$": TokenType.PARAMETER,
241        }
242
243        KEYWORDS = {
244            **tokens.Tokenizer.KEYWORDS,
245            "ADD ARCHIVE": TokenType.COMMAND,
246            "ADD ARCHIVES": TokenType.COMMAND,
247            "ADD FILE": TokenType.COMMAND,
248            "ADD FILES": TokenType.COMMAND,
249            "ADD JAR": TokenType.COMMAND,
250            "ADD JARS": TokenType.COMMAND,
251            "MSCK REPAIR": TokenType.COMMAND,
252            "REFRESH": TokenType.REFRESH,
253            "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
254            "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
255            "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
256        }
257
258        NUMERIC_LITERALS = {
259            "L": "BIGINT",
260            "S": "SMALLINT",
261            "Y": "TINYINT",
262            "D": "DOUBLE",
263            "F": "FLOAT",
264            "BD": "DECIMAL",
265        }
QUOTES = ["'", '"']
IDENTIFIERS = ['`']
STRING_ESCAPES = ['\\']
SINGLE_TOKENS = {'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS = {'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
NUMERIC_LITERALS = {'L': 'BIGINT', 'S': 'SMALLINT', 'Y': 'TINYINT', 'D': 'DOUBLE', 'F': 'FLOAT', 'BD': 'DECIMAL'}
class Hive.Parser(sqlglot.parser.Parser):
267    class Parser(parser.Parser):
268        LOG_DEFAULTS_TO_LN = True
269        STRICT_CAST = False
270
271        FUNCTIONS = {
272            **parser.Parser.FUNCTIONS,
273            "BASE64": exp.ToBase64.from_arg_list,
274            "COLLECT_LIST": exp.ArrayAgg.from_arg_list,
275            "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
276            "DATE_ADD": lambda args: exp.TsOrDsAdd(
277                this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY")
278            ),
279            "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")(
280                [
281                    exp.TimeStrToTime(this=seq_get(args, 0)),
282                    seq_get(args, 1),
283                ]
284            ),
285            "DATE_SUB": lambda args: exp.TsOrDsAdd(
286                this=seq_get(args, 0),
287                expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
288                unit=exp.Literal.string("DAY"),
289            ),
290            "DATEDIFF": lambda args: exp.DateDiff(
291                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
292                expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
293            ),
294            "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
295            "FIRST": _parse_ignore_nulls(exp.First),
296            "FIRST_VALUE": _parse_ignore_nulls(exp.FirstValue),
297            "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True),
298            "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
299            "LAST": _parse_ignore_nulls(exp.Last),
300            "LAST_VALUE": _parse_ignore_nulls(exp.LastValue),
301            "LOCATE": locate_to_strposition,
302            "MAP": parse_var_map,
303            "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
304            "PERCENTILE": exp.Quantile.from_arg_list,
305            "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
306            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
307                this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2)
308            ),
309            "SIZE": exp.ArraySize.from_arg_list,
310            "SPLIT": exp.RegexpSplit.from_arg_list,
311            "STR_TO_MAP": lambda args: exp.StrToMap(
312                this=seq_get(args, 0),
313                pair_delim=seq_get(args, 1) or exp.Literal.string(","),
314                key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
315            ),
316            "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"),
317            "TO_JSON": exp.JSONFormat.from_arg_list,
318            "UNBASE64": exp.FromBase64.from_arg_list,
319            "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True),
320            "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
321        }
322
323        NO_PAREN_FUNCTION_PARSERS = {
324            **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
325            "TRANSFORM": lambda self: self._parse_transform(),
326        }
327
328        PROPERTY_PARSERS = {
329            **parser.Parser.PROPERTY_PARSERS,
330            "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
331                expressions=self._parse_wrapped_csv(self._parse_property)
332            ),
333        }
334
335        def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
336            if not self._match(TokenType.L_PAREN, advance=False):
337                self._retreat(self._index - 1)
338                return None
339
340            args = self._parse_wrapped_csv(self._parse_lambda)
341            row_format_before = self._parse_row_format(match_row=True)
342
343            record_writer = None
344            if self._match_text_seq("RECORDWRITER"):
345                record_writer = self._parse_string()
346
347            if not self._match(TokenType.USING):
348                return exp.Transform.from_arg_list(args)
349
350            command_script = self._parse_string()
351
352            self._match(TokenType.ALIAS)
353            schema = self._parse_schema()
354
355            row_format_after = self._parse_row_format(match_row=True)
356            record_reader = None
357            if self._match_text_seq("RECORDREADER"):
358                record_reader = self._parse_string()
359
360            return self.expression(
361                exp.QueryTransform,
362                expressions=args,
363                command_script=command_script,
364                schema=schema,
365                row_format_before=row_format_before,
366                record_writer=record_writer,
367                row_format_after=row_format_after,
368                record_reader=record_reader,
369            )
370
371        def _parse_types(
372            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
373        ) -> t.Optional[exp.Expression]:
374            """
375            Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
376            STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
377
378                spark-sql (default)> select cast(1234 as varchar(2));
379                23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
380                char/varchar type and simply treats them as string type. Please use string type
381                directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
382                to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
383
384                1234
385                Time taken: 4.265 seconds, Fetched 1 row(s)
386
387            This shows that Spark doesn't truncate the value into '12', which is inconsistent with
388            what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
389
390            Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
391            """
392            this = super()._parse_types(
393                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
394            )
395
396            if this and not schema:
397                return this.transform(
398                    lambda node: (
399                        node.replace(exp.DataType.build("text"))
400                        if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
401                        else node
402                    ),
403                    copy=False,
404                )
405
406            return this
407
408        def _parse_partition_and_order(
409            self,
410        ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
411            return (
412                (
413                    self._parse_csv(self._parse_conjunction)
414                    if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
415                    else []
416                ),
417                super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
418            )

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
LOG_DEFAULTS_TO_LN = True
STRICT_CAST = False
FUNCTIONS = {'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANONYMOUS_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnonymousAggFunc'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_OVERLAPS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayOverlaps'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'ARRAY_UNIQUE_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CBRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cbrt'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'COMBINED_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedAggFunc'>>, 'COMBINED_PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedParameterizedAgg'>>, 'CONCAT': <function Parser.<lambda>>, 'CONCAT_WS': <function Parser.<lambda>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'COUNTIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATE_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <function _parse_ignore_nulls.<locals>._parse>, 'FIRST_VALUE': <function _parse_ignore_nulls.<locals>._parse>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'IIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_INF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'ISINF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <function parse_extract_json_with_path.<locals>._parser>, 'JSON_EXTRACT_SCALAR': <function parse_extract_json_with_path.<locals>._parser>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_OBJECT_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObjectAgg'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lag'>>, 'LAST': <function _parse_ignore_nulls.<locals>._parse>, 'LAST_DAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_VALUE': <function _parse_ignore_nulls.<locals>._parse>, 'LEAD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lead'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <function parse_logarithm>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NTH_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NthValue'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RAND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDOM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Randn'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIMEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMPDIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMPFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToArray'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsDiff'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'TS_OR_DS_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToTime'>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixDate'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'JSON_EXTRACT_PATH_TEXT': <function parse_extract_json_with_path.<locals>._parser>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS = {'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS = {'ALGORITHM': <function Parser.<lambda>>, 'AUTO': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'CONTAINS': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INHERITS': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MODIFIES': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'READS': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SHOW_TRIE: Dict = {}
SET_TRIE: Dict = {'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
sqlglot.parser.Parser
Parser
NO_PAREN_FUNCTIONS
STRUCT_TYPE_TOKENS
NESTED_TYPE_TOKENS
ENUM_TYPE_TOKENS
AGGREGATE_TYPE_TOKENS
TYPE_TOKENS
SIGNED_TO_UNSIGNED_TYPE_TOKEN
SUBQUERY_PREDICATES
RESERVED_TOKENS
DB_CREATABLES
CREATABLES
ID_VAR_TOKENS
INTERVAL_VARS
TABLE_ALIAS_TOKENS
COMMENT_TABLE_ALIAS_TOKENS
UPDATE_ALIAS_TOKENS
TRIM_TYPES
FUNC_TOKENS
CONJUNCTION
EQUALITY
COMPARISON
BITWISE
TERM
FACTOR
EXPONENT
TIMES
TIMESTAMPS
SET_OPERATIONS
JOIN_METHODS
JOIN_SIDES
JOIN_KINDS
JOIN_HINTS
LAMBDAS
COLUMN_OPERATORS
EXPRESSION_PARSERS
STATEMENT_PARSERS
UNARY_PARSERS
PRIMARY_PARSERS
PLACEHOLDER_PARSERS
RANGE_PARSERS
CONSTRAINT_PARSERS
ALTER_PARSERS
SCHEMA_UNNAMED_CONSTRAINTS
INVALID_FUNC_NAME_TOKENS
FUNCTIONS_WITH_ALIASED_ARGS
FUNCTION_PARSERS
QUERY_MODIFIER_PARSERS
SET_PARSERS
SHOW_PARSERS
TYPE_LITERAL_PARSERS
MODIFIABLES
DDL_SELECT_TOKENS
PRE_VOLATILE_TOKENS
TRANSACTION_KIND
TRANSACTION_CHARACTERISTICS
INSERT_ALTERNATIVES
CLONE_KEYWORDS
HISTORICAL_DATA_KIND
OPCLASS_FOLLOW_KEYWORDS
OPTYPE_FOLLOW_TOKENS
TABLE_INDEX_HINT_TOKENS
WINDOW_ALIAS_TOKENS
WINDOW_BEFORE_PAREN_TOKENS
WINDOW_SIDES
JSON_KEY_VALUE_SEPARATOR_TOKENS
FETCH_TOKENS
ADD_CONSTRAINT_TOKENS
DISTINCT_TOKENS
NULL_TOKENS
UNNEST_OFFSET_ALIAS_TOKENS
PREFIXED_PIVOT_COLUMNS
IDENTIFY_PIVOT_STRINGS
ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
TABLESAMPLE_CSV
SET_REQUIRES_ASSIGNMENT_DELIMITER
TRIM_PATTERN_FIRST
STRING_ALIASES
MODIFIERS_ATTACHED_TO_UNION
UNION_MODIFIERS
NO_PAREN_IF_COMMANDS
error_level
error_message_context
max_errors
dialect
reset
parse
parse_into
check_errors
raise_error
expression
validate_expression
errors
sql
class Hive.Generator(sqlglot.generator.Generator):
420    class Generator(generator.Generator):
421        LIMIT_FETCH = "LIMIT"
422        TABLESAMPLE_WITH_METHOD = False
423        JOIN_HINTS = False
424        TABLE_HINTS = False
425        QUERY_HINTS = False
426        INDEX_ON = "ON TABLE"
427        EXTRACT_ALLOWS_QUOTES = False
428        NVL2_SUPPORTED = False
429        LAST_DAY_SUPPORTS_DATE_PART = False
430        JSON_PATH_SINGLE_QUOTE_ESCAPE = True
431
432        EXPRESSIONS_WITHOUT_NESTED_CTES = {
433            exp.Insert,
434            exp.Select,
435            exp.Subquery,
436            exp.Union,
437        }
438
439        SUPPORTED_JSON_PATH_PARTS = {
440            exp.JSONPathKey,
441            exp.JSONPathRoot,
442            exp.JSONPathSubscript,
443            exp.JSONPathWildcard,
444        }
445
446        TYPE_MAPPING = {
447            **generator.Generator.TYPE_MAPPING,
448            exp.DataType.Type.BIT: "BOOLEAN",
449            exp.DataType.Type.DATETIME: "TIMESTAMP",
450            exp.DataType.Type.TEXT: "STRING",
451            exp.DataType.Type.TIME: "TIMESTAMP",
452            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
453            exp.DataType.Type.VARBINARY: "BINARY",
454        }
455
456        TRANSFORMS = {
457            **generator.Generator.TRANSFORMS,
458            exp.Group: transforms.preprocess([transforms.unalias_group]),
459            exp.Select: transforms.preprocess(
460                [
461                    transforms.eliminate_qualify,
462                    transforms.eliminate_distinct_on,
463                    transforms.unnest_to_explode,
464                ]
465            ),
466            exp.Property: _property_sql,
467            exp.AnyValue: rename_func("FIRST"),
468            exp.ApproxDistinct: approx_count_distinct_sql,
469            exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
470            exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
471            exp.ArrayConcat: rename_func("CONCAT"),
472            exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
473            exp.ArraySize: rename_func("SIZE"),
474            exp.ArraySort: _array_sort_sql,
475            exp.With: no_recursive_cte_sql,
476            exp.DateAdd: _add_date_sql,
477            exp.DateDiff: _date_diff_sql,
478            exp.DateStrToDate: datestrtodate_sql,
479            exp.DateSub: _add_date_sql,
480            exp.DateToDi: lambda self,
481            e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)",
482            exp.DiToDate: lambda self,
483            e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})",
484            exp.FileFormatProperty: lambda self,
485            e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}",
486            exp.FromBase64: rename_func("UNBASE64"),
487            exp.If: if_sql(),
488            exp.ILike: no_ilike_sql,
489            exp.IsNan: rename_func("ISNAN"),
490            exp.JSONExtract: rename_func("GET_JSON_OBJECT"),
491            exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"),
492            exp.JSONFormat: _json_format_sql,
493            exp.Left: left_to_substring_sql,
494            exp.Map: var_map_sql,
495            exp.Max: max_or_greatest,
496            exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
497            exp.Min: min_or_least,
498            exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
499            exp.NotNullColumnConstraint: lambda self, e: (
500                "" if e.args.get("allow_null") else "NOT NULL"
501            ),
502            exp.VarMap: var_map_sql,
503            exp.Create: preprocess(
504                [
505                    remove_unique_constraints,
506                    ctas_with_tmp_tables_to_create_tmp_view,
507                    move_schema_columns_to_partitioned_by,
508                ]
509            ),
510            exp.Quantile: rename_func("PERCENTILE"),
511            exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
512            exp.RegexpExtract: regexp_extract_sql,
513            exp.RegexpReplace: regexp_replace_sql,
514            exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
515            exp.RegexpSplit: rename_func("SPLIT"),
516            exp.Right: right_to_substring_sql,
517            exp.SafeDivide: no_safe_divide_sql,
518            exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
519            exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
520            exp.Split: lambda self,
521            e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))",
522            exp.StrPosition: strposition_to_locate_sql,
523            exp.StrToDate: _str_to_date_sql,
524            exp.StrToTime: _str_to_time_sql,
525            exp.StrToUnix: _str_to_unix_sql,
526            exp.StructExtract: struct_extract_sql,
527            exp.TimeStrToDate: rename_func("TO_DATE"),
528            exp.TimeStrToTime: timestrtotime_sql,
529            exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"),
530            exp.TimeToStr: _time_to_str,
531            exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
532            exp.ToBase64: rename_func("BASE64"),
533            exp.TsOrDiToDi: lambda self,
534            e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)",
535            exp.TsOrDsAdd: _add_date_sql,
536            exp.TsOrDsDiff: _date_diff_sql,
537            exp.TsOrDsToDate: _to_date_sql,
538            exp.TryCast: no_trycast_sql,
539            exp.UnixToStr: lambda self, e: self.func(
540                "FROM_UNIXTIME", e.this, time_format("hive")(self, e)
541            ),
542            exp.UnixToTime: rename_func("FROM_UNIXTIME"),
543            exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
544            exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}",
545            exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"),
546            exp.NumberToStr: rename_func("FORMAT_NUMBER"),
547            exp.National: lambda self, e: self.national_sql(e, prefix=""),
548            exp.ClusteredColumnConstraint: lambda self,
549            e: f"({self.expressions(e, 'this', indent=False)})",
550            exp.NonClusteredColumnConstraint: lambda self,
551            e: f"({self.expressions(e, 'this', indent=False)})",
552            exp.NotForReplicationColumnConstraint: lambda self, e: "",
553            exp.OnProperty: lambda self, e: "",
554            exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY",
555        }
556
557        PROPERTIES_LOCATION = {
558            **generator.Generator.PROPERTIES_LOCATION,
559            exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA,
560            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
561            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
562            exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED,
563        }
564
565        def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str:
566            if isinstance(expression.this, exp.JSONPathWildcard):
567                self.unsupported("Unsupported wildcard in JSONPathKey expression")
568                return ""
569
570            return super()._jsonpathkey_sql(expression)
571
572        def parameter_sql(self, expression: exp.Parameter) -> str:
573            this = self.sql(expression, "this")
574            expression_sql = self.sql(expression, "expression")
575
576            parent = expression.parent
577            this = f"{this}:{expression_sql}" if expression_sql else this
578
579            if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
580                # We need to produce SET key = value instead of SET ${key} = value
581                return this
582
583            return f"${{{this}}}"
584
585        def schema_sql(self, expression: exp.Schema) -> str:
586            for ordered in expression.find_all(exp.Ordered):
587                if ordered.args.get("desc") is False:
588                    ordered.set("desc", None)
589
590            return super().schema_sql(expression)
591
592        def constraint_sql(self, expression: exp.Constraint) -> str:
593            for prop in list(expression.find_all(exp.Properties)):
594                prop.pop()
595
596            this = self.sql(expression, "this")
597            expressions = self.expressions(expression, sep=" ", flat=True)
598            return f"CONSTRAINT {this} {expressions}"
599
600        def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
601            serde_props = self.sql(expression, "serde_properties")
602            serde_props = f" {serde_props}" if serde_props else ""
603            return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
604
605        def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
606            return self.func(
607                "COLLECT_LIST",
608                expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
609            )
610
611        def with_properties(self, properties: exp.Properties) -> str:
612            return self.properties(properties, prefix=self.seg("TBLPROPERTIES"))
613
614        def datatype_sql(self, expression: exp.DataType) -> str:
615            if (
616                expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR)
617                and not expression.expressions
618            ):
619                expression = exp.DataType.build("text")
620            elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
621                expression.set("this", exp.DataType.Type.VARCHAR)
622            elif expression.this in exp.DataType.TEMPORAL_TYPES:
623                expression = exp.DataType.build(expression.this)
624            elif expression.is_type("float"):
625                size_expression = expression.find(exp.DataTypeParam)
626                if size_expression:
627                    size = int(size_expression.name)
628                    expression = (
629                        exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
630                    )
631
632            return super().datatype_sql(expression)
633
634        def version_sql(self, expression: exp.Version) -> str:
635            sql = super().version_sql(expression)
636            return sql.replace("FOR ", "", 1)

Generator converts a given syntax tree to the corresponding SQL string.

Arguments:
  • pretty: Whether or not to format the produced SQL string. Default: False.
  • identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
  • normalize: Whether or not to normalize identifiers to lowercase. Default: False.
  • pad: Determines the pad size in a formatted string. Default: 2.
  • indent: Determines the indentation size in a formatted string. Default: 2.
  • normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
  • unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether or not to preserve comments in the output SQL code. Default: True
LIMIT_FETCH = 'LIMIT'
TABLESAMPLE_WITH_METHOD = False
JOIN_HINTS = False
TABLE_HINTS = False
QUERY_HINTS = False
INDEX_ON = 'ON TABLE'
EXTRACT_ALLOWS_QUOTES = False
NVL2_SUPPORTED = False
LAST_DAY_SUPPORTS_DATE_PART = False
JSON_PATH_SINGLE_QUOTE_ESCAPE = True
EXPRESSIONS_WITHOUT_NESTED_CTES = {<class 'sqlglot.expressions.Subquery'>, <class 'sqlglot.expressions.Union'>, <class 'sqlglot.expressions.Insert'>, <class 'sqlglot.expressions.Select'>}
TYPE_MAPPING = {<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS = {<class 'sqlglot.expressions.JSONPathKey'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathRoot'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathSubscript'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathWildcard'>: <function <lambda>>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InheritsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetConfigProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Timestamp'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function datestrtodate_sql>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArrayUniqueAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION = {<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InheritsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SetConfigProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
def parameter_sql(self, expression: sqlglot.expressions.Parameter) -> str:
572        def parameter_sql(self, expression: exp.Parameter) -> str:
573            this = self.sql(expression, "this")
574            expression_sql = self.sql(expression, "expression")
575
576            parent = expression.parent
577            this = f"{this}:{expression_sql}" if expression_sql else this
578
579            if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
580                # We need to produce SET key = value instead of SET ${key} = value
581                return this
582
583            return f"${{{this}}}"
def schema_sql(self, expression: sqlglot.expressions.Schema) -> str:
585        def schema_sql(self, expression: exp.Schema) -> str:
586            for ordered in expression.find_all(exp.Ordered):
587                if ordered.args.get("desc") is False:
588                    ordered.set("desc", None)
589
590            return super().schema_sql(expression)
def constraint_sql(self, expression: sqlglot.expressions.Constraint) -> str:
592        def constraint_sql(self, expression: exp.Constraint) -> str:
593            for prop in list(expression.find_all(exp.Properties)):
594                prop.pop()
595
596            this = self.sql(expression, "this")
597            expressions = self.expressions(expression, sep=" ", flat=True)
598            return f"CONSTRAINT {this} {expressions}"
def rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
600        def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
601            serde_props = self.sql(expression, "serde_properties")
602            serde_props = f" {serde_props}" if serde_props else ""
603            return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
def arrayagg_sql(self, expression: sqlglot.expressions.ArrayAgg) -> str:
605        def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
606            return self.func(
607                "COLLECT_LIST",
608                expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
609            )
def with_properties(self, properties: sqlglot.expressions.Properties) -> str:
611        def with_properties(self, properties: exp.Properties) -> str:
612            return self.properties(properties, prefix=self.seg("TBLPROPERTIES"))
def datatype_sql(self, expression: sqlglot.expressions.DataType) -> str:
614        def datatype_sql(self, expression: exp.DataType) -> str:
615            if (
616                expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR)
617                and not expression.expressions
618            ):
619                expression = exp.DataType.build("text")
620            elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
621                expression.set("this", exp.DataType.Type.VARCHAR)
622            elif expression.this in exp.DataType.TEMPORAL_TYPES:
623                expression = exp.DataType.build(expression.this)
624            elif expression.is_type("float"):
625                size_expression = expression.find(exp.DataTypeParam)
626                if size_expression:
627                    size = int(size_expression.name)
628                    expression = (
629                        exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
630                    )
631
632            return super().datatype_sql(expression)
def version_sql(self, expression: sqlglot.expressions.Version) -> str:
634        def version_sql(self, expression: exp.Version) -> str:
635            sql = super().version_sql(expression)
636            return sql.replace("FOR ", "", 1)
SELECT_KINDS: Tuple[str, ...] = ()
Inherited Members
sqlglot.generator.Generator
Generator
NULL_ORDERING_SUPPORTED
IGNORE_NULLS_IN_FUNC
LOCKING_READS_SUPPORTED
EXPLICIT_UNION
WRAP_DERIVED_VALUES
CREATE_FUNCTION_RETURN_AS
MATCHED_BY_SOURCE
SINGLE_STRING_INTERVAL
INTERVAL_ALLOWS_PLURAL_FORM
LIMIT_ONLY_LITERALS
RENAME_TABLE_WITH_DB
GROUPINGS_SEP
QUERY_HINT_SEP
IS_BOOL_ALLOWED
DUPLICATE_KEY_UPDATE_WITH_SET
LIMIT_IS_TOP
RETURNING_END
COLUMN_JOIN_MARKS_SUPPORTED
TZ_TO_WITH_TIME_ZONE
VALUES_AS_TABLE
ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
UNNEST_WITH_ORDINALITY
AGGREGATE_FILTER_SUPPORTED
SEMI_ANTI_JOIN_WITH_SIDE
COMPUTED_COLUMN_WITH_TYPE
SUPPORTS_TABLE_COPY
TABLESAMPLE_REQUIRES_PARENS
TABLESAMPLE_SIZE_IS_ROWS
TABLESAMPLE_KEYWORDS
TABLESAMPLE_SEED_KEYWORD
COLLATE_IS_FUNC
DATA_TYPE_SPECIFIERS_ALLOWED
ENSURE_BOOLS
CTE_RECURSIVE_KEYWORD_REQUIRED
SUPPORTS_SINGLE_ARG_CONCAT
SUPPORTS_TABLE_ALIAS_COLUMNS
UNPIVOT_ALIASES_ARE_IDENTIFIERS
JSON_KEY_VALUE_PAIR_SEP
INSERT_OVERWRITE
SUPPORTS_SELECT_INTO
SUPPORTS_UNLOGGED_TABLES
SUPPORTS_CREATE_TABLE_LIKE
LIKE_PROPERTY_INSIDE_SCHEMA
MULTI_ARG_DISTINCT
JSON_TYPE_REQUIRED_FOR_EXTRACTION
JSON_PATH_BRACKETED_KEY_SUPPORTED
STAR_MAPPING
TIME_PART_SINGULARS
TOKEN_MAPPING
STRUCT_DELIMITER
PARAMETER_TOKEN
RESERVED_KEYWORDS
WITH_SEPARATED_COMMENTS
EXCLUDE_COMMENTS
UNWRAPPED_INTERVAL_VALUES
KEY_VALUE_DEFINITIONS
SENTINEL_LINE_BREAK
pretty
identify
normalize
pad
unsupported_level
max_unsupported
leading_comma
max_text_width
comments
dialect
normalize_functions
unsupported_messages
generate
preprocess
unsupported
sep
seg
pad_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_sql
columnposition_sql
columndef_sql
columnconstraint_sql
computedcolumnconstraint_sql
autoincrementcolumnconstraint_sql
compresscolumnconstraint_sql
generatedasidentitycolumnconstraint_sql
generatedasrowcolumnconstraint_sql
periodforsystemtimeconstraint_sql
notnullcolumnconstraint_sql
transformcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
createable_sql
create_sql
clone_sql
describe_sql
heredoc_sql
prepend_ctes
with_sql
cte_sql
tablealias_sql
bitstring_sql
hexstring_sql
bytestring_sql
unicodestring_sql
rawstring_sql
datatypeparam_sql
directory_sql
delete_sql
drop_sql
except_sql
except_op
fetch_sql
filter_sql
hint_sql
index_sql
identifier_sql
inputoutputformat_sql
national_sql
partition_sql
properties_sql
root_properties
properties
locate_properties
property_name
property_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
partitionboundspec_sql
partitionedofproperty_sql
lockingproperty_sql
withdataproperty_sql
withsystemversioningproperty_sql
insert_sql
intersect_sql
intersect_op
introducer_sql
kill_sql
pseudotype_sql
objectidentifier_sql
onconflict_sql
returning_sql
rowformatdelimitedproperty_sql
withtablehint_sql
indextablehint_sql
historicaldata_sql
table_sql
tablesample_sql
pivot_sql
tuple_sql
update_sql
values_sql
var_sql
into_sql
from_sql
group_sql
having_sql
connect_sql
prior_sql
join_sql
lambda_sql
lateral_op
lateral_sql
limit_sql
offset_sql
setitem_sql
set_sql
pragma_sql
lock_sql
literal_sql
escape_str
loaddata_sql
null_sql
boolean_sql
order_sql
withfill_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognize_sql
query_modifiers
offset_limit_modifiers
after_having_modifiers
after_limit_modifiers
select_sql
schema_columns_sql
star_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
union_sql
union_op
unnest_sql
where_sql
window_sql
partition_by_sql
windowspec_sql
withingroup_sql
between_sql
bracket_sql
all_sql
any_sql
exists_sql
case_sql
nextvaluefor_sql
extract_sql
trim_sql
convert_concat_args
concat_sql
concatws_sql
check_sql
foreignkey_sql
primarykey_sql
if_sql
matchagainst_sql
jsonkeyvalue_sql
jsonpath_sql
json_path_part
formatjson_sql
jsonobject_sql
jsonobjectagg_sql
jsonarray_sql
jsonarrayagg_sql
jsoncolumndef_sql
jsonschema_sql
jsontable_sql
openjsoncolumndef_sql
openjson_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
pivotalias_sql
aliases_sql
atindex_sql
attimezone_sql
fromtimezone_sql
add_sql
and_sql
xor_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwisenot_sql
bitwiseor_sql
bitwiserightshift_sql
bitwisexor_sql
cast_sql
currentdate_sql
currenttimestamp_sql
collate_sql
command_sql
comment_sql
mergetreettlaction_sql
mergetreettl_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
renametable_sql
renamecolumn_sql
altertable_sql
add_column_sql
droppartition_sql
addconstraint_sql
distinct_sql
ignorenulls_sql
respectnulls_sql
havingmax_sql
intdiv_sql
dpipe_sql
div_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
propertyeq_sql
escape_sql
glob_sql
gt_sql
gte_sql
ilike_sql
ilikeany_sql
is_sql
like_sql
likeany_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
or_sql
slice_sql
sub_sql
trycast_sql
log_sql
use_sql
binary
function_fallback_sql
func
format_args
text_width
format_time
expressions
op_expressions
naked_property
set_operation
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
merge_sql
tochar_sql
dictproperty_sql
dictrange_sql
dictsubproperty_sql
oncluster_sql
clusteredbyproperty_sql
anyvalue_sql
querytransform_sql
indexconstraintoption_sql
indexcolumnconstraint_sql
nvl2_sql
comprehension_sql
columnprefix_sql
opclass_sql
predict_sql
forin_sql
refresh_sql
operator_sql
toarray_sql
tsordstotime_sql
tsordstodate_sql
unixdate_sql
lastday_sql