sqlglot.dialects.spark2
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, parser, transforms 6from sqlglot.dialects.dialect import ( 7 create_with_partitions_sql, 8 pivot_column_names, 9 rename_func, 10 trim_sql, 11) 12from sqlglot.dialects.hive import Hive 13from sqlglot.helper import seq_get 14 15 16def _create_sql(self: Hive.Generator, e: exp.Create) -> str: 17 kind = e.args["kind"] 18 properties = e.args.get("properties") 19 20 if kind.upper() == "TABLE" and any( 21 isinstance(prop, exp.TemporaryProperty) 22 for prop in (properties.expressions if properties else []) 23 ): 24 return f"CREATE TEMPORARY VIEW {self.sql(e, 'this')} AS {self.sql(e, 'expression')}" 25 return create_with_partitions_sql(self, e) 26 27 28def _map_sql(self: Hive.Generator, expression: exp.Map) -> str: 29 keys = self.sql(expression.args["keys"]) 30 values = self.sql(expression.args["values"]) 31 return f"MAP_FROM_ARRAYS({keys}, {values})" 32 33 34def _parse_as_cast(to_type: str) -> t.Callable[[t.List], exp.Expression]: 35 return lambda args: exp.Cast(this=seq_get(args, 0), to=exp.DataType.build(to_type)) 36 37 38def _str_to_date(self: Hive.Generator, expression: exp.StrToDate) -> str: 39 this = self.sql(expression, "this") 40 time_format = self.format_time(expression) 41 if time_format == Hive.DATE_FORMAT: 42 return f"TO_DATE({this})" 43 return f"TO_DATE({this}, {time_format})" 44 45 46def _unix_to_time_sql(self: Hive.Generator, expression: exp.UnixToTime) -> str: 47 scale = expression.args.get("scale") 48 timestamp = self.sql(expression, "this") 49 if scale is None: 50 return f"CAST(FROM_UNIXTIME({timestamp}) AS TIMESTAMP)" 51 if scale == exp.UnixToTime.SECONDS: 52 return f"TIMESTAMP_SECONDS({timestamp})" 53 if scale == exp.UnixToTime.MILLIS: 54 return f"TIMESTAMP_MILLIS({timestamp})" 55 if scale == exp.UnixToTime.MICROS: 56 return f"TIMESTAMP_MICROS({timestamp})" 57 58 raise ValueError("Improper scale for timestamp") 59 60 61def _unalias_pivot(expression: exp.Expression) -> exp.Expression: 62 """ 63 Spark doesn't allow PIVOT aliases, so we need to remove them and possibly wrap a 64 pivoted source in a subquery with the same alias to preserve the query's semantics. 65 66 Example: 67 >>> from sqlglot import parse_one 68 >>> expr = parse_one("SELECT piv.x FROM tbl PIVOT (SUM(a) FOR b IN ('x')) piv") 69 >>> print(_unalias_pivot(expr).sql(dialect="spark")) 70 SELECT piv.x FROM (SELECT * FROM tbl PIVOT(SUM(a) FOR b IN ('x'))) AS piv 71 """ 72 if isinstance(expression, exp.From) and expression.this.args.get("pivots"): 73 pivot = expression.this.args["pivots"][0] 74 if pivot.alias: 75 alias = pivot.args["alias"].pop() 76 return exp.From( 77 this=expression.this.replace( 78 exp.select("*").from_(expression.this.copy()).subquery(alias=alias) 79 ) 80 ) 81 82 return expression 83 84 85def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 86 """ 87 Spark doesn't allow the column referenced in the PIVOT's field to be qualified, 88 so we need to unqualify it. 89 90 Example: 91 >>> from sqlglot import parse_one 92 >>> expr = parse_one("SELECT * FROM tbl PIVOT (SUM(tbl.sales) FOR tbl.quarter IN ('Q1', 'Q2'))") 93 >>> print(_unqualify_pivot_columns(expr).sql(dialect="spark")) 94 SELECT * FROM tbl PIVOT(SUM(tbl.sales) FOR quarter IN ('Q1', 'Q1')) 95 """ 96 if isinstance(expression, exp.Pivot): 97 expression.args["field"].transform( 98 lambda node: exp.column(node.output_name, quoted=node.this.quoted) 99 if isinstance(node, exp.Column) 100 else node, 101 copy=False, 102 ) 103 104 return expression 105 106 107class Spark2(Hive): 108 class Parser(Hive.Parser): 109 FUNCTIONS = { 110 **Hive.Parser.FUNCTIONS, 111 "MAP_FROM_ARRAYS": exp.Map.from_arg_list, 112 "TO_UNIX_TIMESTAMP": exp.StrToUnix.from_arg_list, 113 "SHIFTLEFT": lambda args: exp.BitwiseLeftShift( 114 this=seq_get(args, 0), 115 expression=seq_get(args, 1), 116 ), 117 "SHIFTRIGHT": lambda args: exp.BitwiseRightShift( 118 this=seq_get(args, 0), 119 expression=seq_get(args, 1), 120 ), 121 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 122 "IIF": exp.If.from_arg_list, 123 "AGGREGATE": exp.Reduce.from_arg_list, 124 "DAYOFWEEK": lambda args: exp.DayOfWeek( 125 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 126 ), 127 "DAYOFMONTH": lambda args: exp.DayOfMonth( 128 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 129 ), 130 "DAYOFYEAR": lambda args: exp.DayOfYear( 131 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 132 ), 133 "WEEKOFYEAR": lambda args: exp.WeekOfYear( 134 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 135 ), 136 "DATE_TRUNC": lambda args: exp.TimestampTrunc( 137 this=seq_get(args, 1), 138 unit=exp.var(seq_get(args, 0)), 139 ), 140 "TRUNC": lambda args: exp.DateTrunc(unit=seq_get(args, 1), this=seq_get(args, 0)), 141 "BOOLEAN": _parse_as_cast("boolean"), 142 "DATE": _parse_as_cast("date"), 143 "DOUBLE": _parse_as_cast("double"), 144 "FLOAT": _parse_as_cast("float"), 145 "INT": _parse_as_cast("int"), 146 "STRING": _parse_as_cast("string"), 147 "TIMESTAMP": _parse_as_cast("timestamp"), 148 } 149 150 FUNCTION_PARSERS = { 151 **parser.Parser.FUNCTION_PARSERS, 152 "BROADCAST": lambda self: self._parse_join_hint("BROADCAST"), 153 "BROADCASTJOIN": lambda self: self._parse_join_hint("BROADCASTJOIN"), 154 "MAPJOIN": lambda self: self._parse_join_hint("MAPJOIN"), 155 "MERGE": lambda self: self._parse_join_hint("MERGE"), 156 "SHUFFLEMERGE": lambda self: self._parse_join_hint("SHUFFLEMERGE"), 157 "MERGEJOIN": lambda self: self._parse_join_hint("MERGEJOIN"), 158 "SHUFFLE_HASH": lambda self: self._parse_join_hint("SHUFFLE_HASH"), 159 "SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"), 160 } 161 162 def _parse_add_column(self) -> t.Optional[exp.Expression]: 163 return self._match_text_seq("ADD", "COLUMNS") and self._parse_schema() 164 165 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 166 return self._match_text_seq("DROP", "COLUMNS") and self.expression( 167 exp.Drop, this=self._parse_schema(), kind="COLUMNS" 168 ) 169 170 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 171 if len(aggregations) == 1: 172 return [""] 173 return pivot_column_names(aggregations, dialect="spark") 174 175 class Generator(Hive.Generator): 176 TYPE_MAPPING = { 177 **Hive.Generator.TYPE_MAPPING, 178 exp.DataType.Type.TINYINT: "BYTE", 179 exp.DataType.Type.SMALLINT: "SHORT", 180 exp.DataType.Type.BIGINT: "LONG", 181 } 182 183 PROPERTIES_LOCATION = { 184 **Hive.Generator.PROPERTIES_LOCATION, 185 exp.EngineProperty: exp.Properties.Location.UNSUPPORTED, 186 exp.AutoIncrementProperty: exp.Properties.Location.UNSUPPORTED, 187 exp.CharacterSetProperty: exp.Properties.Location.UNSUPPORTED, 188 exp.CollateProperty: exp.Properties.Location.UNSUPPORTED, 189 } 190 191 TRANSFORMS = { 192 **Hive.Generator.TRANSFORMS, 193 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 194 exp.ArraySum: lambda self, e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)", 195 exp.AtTimeZone: lambda self, e: f"FROM_UTC_TIMESTAMP({self.sql(e, 'this')}, {self.sql(e, 'zone')})", 196 exp.BitwiseLeftShift: rename_func("SHIFTLEFT"), 197 exp.BitwiseRightShift: rename_func("SHIFTRIGHT"), 198 exp.Create: _create_sql, 199 exp.DateFromParts: rename_func("MAKE_DATE"), 200 exp.DateTrunc: lambda self, e: self.func("TRUNC", e.this, e.args.get("unit")), 201 exp.DayOfMonth: rename_func("DAYOFMONTH"), 202 exp.DayOfWeek: rename_func("DAYOFWEEK"), 203 exp.DayOfYear: rename_func("DAYOFYEAR"), 204 exp.FileFormatProperty: lambda self, e: f"USING {e.name.upper()}", 205 exp.From: transforms.preprocess([_unalias_pivot]), 206 exp.Hint: lambda self, e: f" /*+ {self.expressions(e).strip()} */", 207 exp.LogicalAnd: rename_func("BOOL_AND"), 208 exp.LogicalOr: rename_func("BOOL_OR"), 209 exp.Map: _map_sql, 210 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 211 exp.Reduce: rename_func("AGGREGATE"), 212 exp.StrToDate: _str_to_date, 213 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 214 exp.TimestampTrunc: lambda self, e: self.func( 215 "DATE_TRUNC", exp.Literal.string(e.text("unit")), e.this 216 ), 217 exp.Trim: trim_sql, 218 exp.UnixToTime: _unix_to_time_sql, 219 exp.VariancePop: rename_func("VAR_POP"), 220 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 221 exp.WithinGroup: transforms.preprocess( 222 [transforms.remove_within_group_for_percentiles] 223 ), 224 } 225 TRANSFORMS.pop(exp.ArrayJoin) 226 TRANSFORMS.pop(exp.ArraySort) 227 TRANSFORMS.pop(exp.ILike) 228 TRANSFORMS.pop(exp.Left) 229 TRANSFORMS.pop(exp.Right) 230 231 WRAP_DERIVED_VALUES = False 232 CREATE_FUNCTION_RETURN_AS = False 233 234 def cast_sql(self, expression: exp.Cast) -> str: 235 if isinstance(expression.this, exp.Cast) and expression.this.is_type("json"): 236 schema = f"'{self.sql(expression, 'to')}'" 237 return self.func("FROM_JSON", expression.this.this, schema) 238 if expression.is_type("json"): 239 return self.func("TO_JSON", expression.this) 240 241 return super(Hive.Generator, self).cast_sql(expression) 242 243 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 244 return super().columndef_sql( 245 expression, 246 sep=": " 247 if isinstance(expression.parent, exp.DataType) 248 and expression.parent.is_type("struct") 249 else sep, 250 ) 251 252 class Tokenizer(Hive.Tokenizer): 253 HEX_STRINGS = [("X'", "'")]
108class Spark2(Hive): 109 class Parser(Hive.Parser): 110 FUNCTIONS = { 111 **Hive.Parser.FUNCTIONS, 112 "MAP_FROM_ARRAYS": exp.Map.from_arg_list, 113 "TO_UNIX_TIMESTAMP": exp.StrToUnix.from_arg_list, 114 "SHIFTLEFT": lambda args: exp.BitwiseLeftShift( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 ), 118 "SHIFTRIGHT": lambda args: exp.BitwiseRightShift( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 ), 122 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 123 "IIF": exp.If.from_arg_list, 124 "AGGREGATE": exp.Reduce.from_arg_list, 125 "DAYOFWEEK": lambda args: exp.DayOfWeek( 126 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 127 ), 128 "DAYOFMONTH": lambda args: exp.DayOfMonth( 129 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 130 ), 131 "DAYOFYEAR": lambda args: exp.DayOfYear( 132 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 133 ), 134 "WEEKOFYEAR": lambda args: exp.WeekOfYear( 135 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 136 ), 137 "DATE_TRUNC": lambda args: exp.TimestampTrunc( 138 this=seq_get(args, 1), 139 unit=exp.var(seq_get(args, 0)), 140 ), 141 "TRUNC": lambda args: exp.DateTrunc(unit=seq_get(args, 1), this=seq_get(args, 0)), 142 "BOOLEAN": _parse_as_cast("boolean"), 143 "DATE": _parse_as_cast("date"), 144 "DOUBLE": _parse_as_cast("double"), 145 "FLOAT": _parse_as_cast("float"), 146 "INT": _parse_as_cast("int"), 147 "STRING": _parse_as_cast("string"), 148 "TIMESTAMP": _parse_as_cast("timestamp"), 149 } 150 151 FUNCTION_PARSERS = { 152 **parser.Parser.FUNCTION_PARSERS, 153 "BROADCAST": lambda self: self._parse_join_hint("BROADCAST"), 154 "BROADCASTJOIN": lambda self: self._parse_join_hint("BROADCASTJOIN"), 155 "MAPJOIN": lambda self: self._parse_join_hint("MAPJOIN"), 156 "MERGE": lambda self: self._parse_join_hint("MERGE"), 157 "SHUFFLEMERGE": lambda self: self._parse_join_hint("SHUFFLEMERGE"), 158 "MERGEJOIN": lambda self: self._parse_join_hint("MERGEJOIN"), 159 "SHUFFLE_HASH": lambda self: self._parse_join_hint("SHUFFLE_HASH"), 160 "SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"), 161 } 162 163 def _parse_add_column(self) -> t.Optional[exp.Expression]: 164 return self._match_text_seq("ADD", "COLUMNS") and self._parse_schema() 165 166 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 167 return self._match_text_seq("DROP", "COLUMNS") and self.expression( 168 exp.Drop, this=self._parse_schema(), kind="COLUMNS" 169 ) 170 171 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 172 if len(aggregations) == 1: 173 return [""] 174 return pivot_column_names(aggregations, dialect="spark") 175 176 class Generator(Hive.Generator): 177 TYPE_MAPPING = { 178 **Hive.Generator.TYPE_MAPPING, 179 exp.DataType.Type.TINYINT: "BYTE", 180 exp.DataType.Type.SMALLINT: "SHORT", 181 exp.DataType.Type.BIGINT: "LONG", 182 } 183 184 PROPERTIES_LOCATION = { 185 **Hive.Generator.PROPERTIES_LOCATION, 186 exp.EngineProperty: exp.Properties.Location.UNSUPPORTED, 187 exp.AutoIncrementProperty: exp.Properties.Location.UNSUPPORTED, 188 exp.CharacterSetProperty: exp.Properties.Location.UNSUPPORTED, 189 exp.CollateProperty: exp.Properties.Location.UNSUPPORTED, 190 } 191 192 TRANSFORMS = { 193 **Hive.Generator.TRANSFORMS, 194 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 195 exp.ArraySum: lambda self, e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)", 196 exp.AtTimeZone: lambda self, e: f"FROM_UTC_TIMESTAMP({self.sql(e, 'this')}, {self.sql(e, 'zone')})", 197 exp.BitwiseLeftShift: rename_func("SHIFTLEFT"), 198 exp.BitwiseRightShift: rename_func("SHIFTRIGHT"), 199 exp.Create: _create_sql, 200 exp.DateFromParts: rename_func("MAKE_DATE"), 201 exp.DateTrunc: lambda self, e: self.func("TRUNC", e.this, e.args.get("unit")), 202 exp.DayOfMonth: rename_func("DAYOFMONTH"), 203 exp.DayOfWeek: rename_func("DAYOFWEEK"), 204 exp.DayOfYear: rename_func("DAYOFYEAR"), 205 exp.FileFormatProperty: lambda self, e: f"USING {e.name.upper()}", 206 exp.From: transforms.preprocess([_unalias_pivot]), 207 exp.Hint: lambda self, e: f" /*+ {self.expressions(e).strip()} */", 208 exp.LogicalAnd: rename_func("BOOL_AND"), 209 exp.LogicalOr: rename_func("BOOL_OR"), 210 exp.Map: _map_sql, 211 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 212 exp.Reduce: rename_func("AGGREGATE"), 213 exp.StrToDate: _str_to_date, 214 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 215 exp.TimestampTrunc: lambda self, e: self.func( 216 "DATE_TRUNC", exp.Literal.string(e.text("unit")), e.this 217 ), 218 exp.Trim: trim_sql, 219 exp.UnixToTime: _unix_to_time_sql, 220 exp.VariancePop: rename_func("VAR_POP"), 221 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 222 exp.WithinGroup: transforms.preprocess( 223 [transforms.remove_within_group_for_percentiles] 224 ), 225 } 226 TRANSFORMS.pop(exp.ArrayJoin) 227 TRANSFORMS.pop(exp.ArraySort) 228 TRANSFORMS.pop(exp.ILike) 229 TRANSFORMS.pop(exp.Left) 230 TRANSFORMS.pop(exp.Right) 231 232 WRAP_DERIVED_VALUES = False 233 CREATE_FUNCTION_RETURN_AS = False 234 235 def cast_sql(self, expression: exp.Cast) -> str: 236 if isinstance(expression.this, exp.Cast) and expression.this.is_type("json"): 237 schema = f"'{self.sql(expression, 'to')}'" 238 return self.func("FROM_JSON", expression.this.this, schema) 239 if expression.is_type("json"): 240 return self.func("TO_JSON", expression.this) 241 242 return super(Hive.Generator, self).cast_sql(expression) 243 244 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 245 return super().columndef_sql( 246 expression, 247 sep=": " 248 if isinstance(expression.parent, exp.DataType) 249 and expression.parent.is_type("struct") 250 else sep, 251 ) 252 253 class Tokenizer(Hive.Tokenizer): 254 HEX_STRINGS = [("X'", "'")]
109 class Parser(Hive.Parser): 110 FUNCTIONS = { 111 **Hive.Parser.FUNCTIONS, 112 "MAP_FROM_ARRAYS": exp.Map.from_arg_list, 113 "TO_UNIX_TIMESTAMP": exp.StrToUnix.from_arg_list, 114 "SHIFTLEFT": lambda args: exp.BitwiseLeftShift( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 ), 118 "SHIFTRIGHT": lambda args: exp.BitwiseRightShift( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 ), 122 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 123 "IIF": exp.If.from_arg_list, 124 "AGGREGATE": exp.Reduce.from_arg_list, 125 "DAYOFWEEK": lambda args: exp.DayOfWeek( 126 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 127 ), 128 "DAYOFMONTH": lambda args: exp.DayOfMonth( 129 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 130 ), 131 "DAYOFYEAR": lambda args: exp.DayOfYear( 132 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 133 ), 134 "WEEKOFYEAR": lambda args: exp.WeekOfYear( 135 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 136 ), 137 "DATE_TRUNC": lambda args: exp.TimestampTrunc( 138 this=seq_get(args, 1), 139 unit=exp.var(seq_get(args, 0)), 140 ), 141 "TRUNC": lambda args: exp.DateTrunc(unit=seq_get(args, 1), this=seq_get(args, 0)), 142 "BOOLEAN": _parse_as_cast("boolean"), 143 "DATE": _parse_as_cast("date"), 144 "DOUBLE": _parse_as_cast("double"), 145 "FLOAT": _parse_as_cast("float"), 146 "INT": _parse_as_cast("int"), 147 "STRING": _parse_as_cast("string"), 148 "TIMESTAMP": _parse_as_cast("timestamp"), 149 } 150 151 FUNCTION_PARSERS = { 152 **parser.Parser.FUNCTION_PARSERS, 153 "BROADCAST": lambda self: self._parse_join_hint("BROADCAST"), 154 "BROADCASTJOIN": lambda self: self._parse_join_hint("BROADCASTJOIN"), 155 "MAPJOIN": lambda self: self._parse_join_hint("MAPJOIN"), 156 "MERGE": lambda self: self._parse_join_hint("MERGE"), 157 "SHUFFLEMERGE": lambda self: self._parse_join_hint("SHUFFLEMERGE"), 158 "MERGEJOIN": lambda self: self._parse_join_hint("MERGEJOIN"), 159 "SHUFFLE_HASH": lambda self: self._parse_join_hint("SHUFFLE_HASH"), 160 "SHUFFLE_REPLICATE_NL": lambda self: self._parse_join_hint("SHUFFLE_REPLICATE_NL"), 161 } 162 163 def _parse_add_column(self) -> t.Optional[exp.Expression]: 164 return self._match_text_seq("ADD", "COLUMNS") and self._parse_schema() 165 166 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 167 return self._match_text_seq("DROP", "COLUMNS") and self.expression( 168 exp.Drop, this=self._parse_schema(), kind="COLUMNS" 169 ) 170 171 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 172 if len(aggregations) == 1: 173 return [""] 174 return pivot_column_names(aggregations, dialect="spark")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
176 class Generator(Hive.Generator): 177 TYPE_MAPPING = { 178 **Hive.Generator.TYPE_MAPPING, 179 exp.DataType.Type.TINYINT: "BYTE", 180 exp.DataType.Type.SMALLINT: "SHORT", 181 exp.DataType.Type.BIGINT: "LONG", 182 } 183 184 PROPERTIES_LOCATION = { 185 **Hive.Generator.PROPERTIES_LOCATION, 186 exp.EngineProperty: exp.Properties.Location.UNSUPPORTED, 187 exp.AutoIncrementProperty: exp.Properties.Location.UNSUPPORTED, 188 exp.CharacterSetProperty: exp.Properties.Location.UNSUPPORTED, 189 exp.CollateProperty: exp.Properties.Location.UNSUPPORTED, 190 } 191 192 TRANSFORMS = { 193 **Hive.Generator.TRANSFORMS, 194 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 195 exp.ArraySum: lambda self, e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)", 196 exp.AtTimeZone: lambda self, e: f"FROM_UTC_TIMESTAMP({self.sql(e, 'this')}, {self.sql(e, 'zone')})", 197 exp.BitwiseLeftShift: rename_func("SHIFTLEFT"), 198 exp.BitwiseRightShift: rename_func("SHIFTRIGHT"), 199 exp.Create: _create_sql, 200 exp.DateFromParts: rename_func("MAKE_DATE"), 201 exp.DateTrunc: lambda self, e: self.func("TRUNC", e.this, e.args.get("unit")), 202 exp.DayOfMonth: rename_func("DAYOFMONTH"), 203 exp.DayOfWeek: rename_func("DAYOFWEEK"), 204 exp.DayOfYear: rename_func("DAYOFYEAR"), 205 exp.FileFormatProperty: lambda self, e: f"USING {e.name.upper()}", 206 exp.From: transforms.preprocess([_unalias_pivot]), 207 exp.Hint: lambda self, e: f" /*+ {self.expressions(e).strip()} */", 208 exp.LogicalAnd: rename_func("BOOL_AND"), 209 exp.LogicalOr: rename_func("BOOL_OR"), 210 exp.Map: _map_sql, 211 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 212 exp.Reduce: rename_func("AGGREGATE"), 213 exp.StrToDate: _str_to_date, 214 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 215 exp.TimestampTrunc: lambda self, e: self.func( 216 "DATE_TRUNC", exp.Literal.string(e.text("unit")), e.this 217 ), 218 exp.Trim: trim_sql, 219 exp.UnixToTime: _unix_to_time_sql, 220 exp.VariancePop: rename_func("VAR_POP"), 221 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 222 exp.WithinGroup: transforms.preprocess( 223 [transforms.remove_within_group_for_percentiles] 224 ), 225 } 226 TRANSFORMS.pop(exp.ArrayJoin) 227 TRANSFORMS.pop(exp.ArraySort) 228 TRANSFORMS.pop(exp.ILike) 229 TRANSFORMS.pop(exp.Left) 230 TRANSFORMS.pop(exp.Right) 231 232 WRAP_DERIVED_VALUES = False 233 CREATE_FUNCTION_RETURN_AS = False 234 235 def cast_sql(self, expression: exp.Cast) -> str: 236 if isinstance(expression.this, exp.Cast) and expression.this.is_type("json"): 237 schema = f"'{self.sql(expression, 'to')}'" 238 return self.func("FROM_JSON", expression.this.this, schema) 239 if expression.is_type("json"): 240 return self.func("TO_JSON", expression.this) 241 242 return super(Hive.Generator, self).cast_sql(expression) 243 244 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 245 return super().columndef_sql( 246 expression, 247 sep=": " 248 if isinstance(expression.parent, exp.DataType) 249 and expression.parent.is_type("struct") 250 else sep, 251 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
235 def cast_sql(self, expression: exp.Cast) -> str: 236 if isinstance(expression.this, exp.Cast) and expression.this.is_type("json"): 237 schema = f"'{self.sql(expression, 'to')}'" 238 return self.func("FROM_JSON", expression.this.this, schema) 239 if expression.is_type("json"): 240 return self.func("TO_JSON", expression.this) 241 242 return super(Hive.Generator, self).cast_sql(expression)
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
247 @classmethod 248 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 249 """Checks if text can be identified given an identify option. 250 251 Args: 252 text: The text to check. 253 identify: 254 "always" or `True`: Always returns true. 255 "safe": True if the identifier is case-insensitive. 256 257 Returns: 258 Whether or not the given text can be identified. 259 """ 260 if identify is True or identify == "always": 261 return True 262 263 if identify == "safe": 264 return not cls.case_sensitive(text) 265 266 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql