sqlglot.dialects.redshift
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, transforms 6from sqlglot.dialects.dialect import ( 7 NormalizationStrategy, 8 concat_to_dpipe_sql, 9 concat_ws_to_dpipe_sql, 10 date_delta_sql, 11 generatedasidentitycolumnconstraint_sql, 12 no_tablesample_sql, 13 rename_func, 14) 15from sqlglot.dialects.postgres import Postgres 16from sqlglot.helper import seq_get 17from sqlglot.tokens import TokenType 18 19if t.TYPE_CHECKING: 20 from sqlglot._typing import E 21 22 23def _json_sql(self: Redshift.Generator, expression: exp.JSONExtract | exp.JSONExtractScalar) -> str: 24 return f'{self.sql(expression, "this")}."{expression.expression.name}"' 25 26 27def _parse_date_delta(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 28 def _parse_delta(args: t.List) -> E: 29 expr = expr_type(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 30 if expr_type is exp.TsOrDsAdd: 31 expr.set("return_type", exp.DataType.build("TIMESTAMP")) 32 33 return expr 34 35 return _parse_delta 36 37 38class Redshift(Postgres): 39 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 40 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 41 42 SUPPORTS_USER_DEFINED_TYPES = False 43 INDEX_OFFSET = 0 44 45 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 46 TIME_MAPPING = { 47 **Postgres.TIME_MAPPING, 48 "MON": "%b", 49 "HH": "%H", 50 } 51 52 class Parser(Postgres.Parser): 53 FUNCTIONS = { 54 **Postgres.Parser.FUNCTIONS, 55 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 56 this=seq_get(args, 0), 57 expression=seq_get(args, 1), 58 unit=exp.var("month"), 59 return_type=exp.DataType.build("TIMESTAMP"), 60 ), 61 "DATEADD": _parse_date_delta(exp.TsOrDsAdd), 62 "DATE_ADD": _parse_date_delta(exp.TsOrDsAdd), 63 "DATEDIFF": _parse_date_delta(exp.TsOrDsDiff), 64 "DATE_DIFF": _parse_date_delta(exp.TsOrDsDiff), 65 "LISTAGG": exp.GroupConcat.from_arg_list, 66 "STRTOL": exp.FromBase.from_arg_list, 67 } 68 69 NO_PAREN_FUNCTION_PARSERS = { 70 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 71 "APPROXIMATE": lambda self: self._parse_approximate_count(), 72 } 73 74 def _parse_table( 75 self, 76 schema: bool = False, 77 joins: bool = False, 78 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 79 parse_bracket: bool = False, 80 ) -> t.Optional[exp.Expression]: 81 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 82 unpivot = self._match(TokenType.UNPIVOT) 83 table = super()._parse_table( 84 schema=schema, 85 joins=joins, 86 alias_tokens=alias_tokens, 87 parse_bracket=parse_bracket, 88 ) 89 90 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 91 92 def _parse_types( 93 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 94 ) -> t.Optional[exp.Expression]: 95 this = super()._parse_types( 96 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 97 ) 98 99 if ( 100 isinstance(this, exp.DataType) 101 and this.is_type("varchar") 102 and this.expressions 103 and this.expressions[0].this == exp.column("MAX") 104 ): 105 this.set("expressions", [exp.var("MAX")]) 106 107 return this 108 109 def _parse_convert( 110 self, strict: bool, safe: t.Optional[bool] = None 111 ) -> t.Optional[exp.Expression]: 112 to = self._parse_types() 113 self._match(TokenType.COMMA) 114 this = self._parse_bitwise() 115 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 116 117 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 118 index = self._index - 1 119 func = self._parse_function() 120 121 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 122 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 123 self._retreat(index) 124 return None 125 126 def _parse_query_modifiers( 127 self, this: t.Optional[exp.Expression] 128 ) -> t.Optional[exp.Expression]: 129 this = super()._parse_query_modifiers(this) 130 131 if this: 132 refs = set() 133 134 for i, join in enumerate(this.args.get("joins", [])): 135 refs.add( 136 ( 137 this.args["from"] if i == 0 else this.args["joins"][i - 1] 138 ).alias_or_name.lower() 139 ) 140 table = join.this 141 142 if isinstance(table, exp.Table): 143 if table.parts[0].name.lower() in refs: 144 table.replace(table.to_column()) 145 return this 146 147 class Tokenizer(Postgres.Tokenizer): 148 BIT_STRINGS = [] 149 HEX_STRINGS = [] 150 STRING_ESCAPES = ["\\", "'"] 151 152 KEYWORDS = { 153 **Postgres.Tokenizer.KEYWORDS, 154 "HLLSKETCH": TokenType.HLLSKETCH, 155 "SUPER": TokenType.SUPER, 156 "SYSDATE": TokenType.CURRENT_TIMESTAMP, 157 "TOP": TokenType.TOP, 158 "UNLOAD": TokenType.COMMAND, 159 "VARBYTE": TokenType.VARBINARY, 160 } 161 162 # Redshift allows # to appear as a table identifier prefix 163 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 164 SINGLE_TOKENS.pop("#") 165 166 class Generator(Postgres.Generator): 167 LOCKING_READS_SUPPORTED = False 168 QUERY_HINTS = False 169 VALUES_AS_TABLE = False 170 TZ_TO_WITH_TIME_ZONE = True 171 NVL2_SUPPORTED = True 172 LAST_DAY_SUPPORTS_DATE_PART = False 173 174 TYPE_MAPPING = { 175 **Postgres.Generator.TYPE_MAPPING, 176 exp.DataType.Type.BINARY: "VARBYTE", 177 exp.DataType.Type.INT: "INTEGER", 178 exp.DataType.Type.TIMETZ: "TIME", 179 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 180 exp.DataType.Type.VARBINARY: "VARBYTE", 181 } 182 183 PROPERTIES_LOCATION = { 184 **Postgres.Generator.PROPERTIES_LOCATION, 185 exp.LikeProperty: exp.Properties.Location.POST_WITH, 186 } 187 188 TRANSFORMS = { 189 **Postgres.Generator.TRANSFORMS, 190 exp.Concat: concat_to_dpipe_sql, 191 exp.ConcatWs: concat_ws_to_dpipe_sql, 192 exp.ApproxDistinct: lambda self, e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 193 exp.CurrentTimestamp: lambda self, e: "SYSDATE", 194 exp.DateAdd: date_delta_sql("DATEADD"), 195 exp.DateDiff: date_delta_sql("DATEDIFF"), 196 exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})", 197 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 198 exp.FromBase: rename_func("STRTOL"), 199 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 200 exp.JSONExtract: _json_sql, 201 exp.JSONExtractScalar: _json_sql, 202 exp.GroupConcat: rename_func("LISTAGG"), 203 exp.ParseJSON: rename_func("JSON_PARSE"), 204 exp.Select: transforms.preprocess( 205 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 206 ), 207 exp.SortKeyProperty: lambda self, e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 208 exp.TableSample: no_tablesample_sql, 209 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 210 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 211 } 212 213 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 214 TRANSFORMS.pop(exp.Pivot) 215 216 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 217 TRANSFORMS.pop(exp.Pow) 218 219 # Redshift supports ANY_VALUE(..) 220 TRANSFORMS.pop(exp.AnyValue) 221 222 # Redshift supports LAST_DAY(..) 223 TRANSFORMS.pop(exp.LastDay) 224 225 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 226 227 def with_properties(self, properties: exp.Properties) -> str: 228 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 229 return self.properties(properties, prefix=" ", suffix="") 230 231 def datatype_sql(self, expression: exp.DataType) -> str: 232 """ 233 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 234 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 235 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 236 `TEXT` to `VARCHAR`. 237 """ 238 if expression.is_type("text"): 239 expression.set("this", exp.DataType.Type.VARCHAR) 240 precision = expression.args.get("expressions") 241 242 if not precision: 243 expression.append("expressions", exp.var("MAX")) 244 245 return super().datatype_sql(expression)
39class Redshift(Postgres): 40 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 41 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 42 43 SUPPORTS_USER_DEFINED_TYPES = False 44 INDEX_OFFSET = 0 45 46 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 47 TIME_MAPPING = { 48 **Postgres.TIME_MAPPING, 49 "MON": "%b", 50 "HH": "%H", 51 } 52 53 class Parser(Postgres.Parser): 54 FUNCTIONS = { 55 **Postgres.Parser.FUNCTIONS, 56 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 57 this=seq_get(args, 0), 58 expression=seq_get(args, 1), 59 unit=exp.var("month"), 60 return_type=exp.DataType.build("TIMESTAMP"), 61 ), 62 "DATEADD": _parse_date_delta(exp.TsOrDsAdd), 63 "DATE_ADD": _parse_date_delta(exp.TsOrDsAdd), 64 "DATEDIFF": _parse_date_delta(exp.TsOrDsDiff), 65 "DATE_DIFF": _parse_date_delta(exp.TsOrDsDiff), 66 "LISTAGG": exp.GroupConcat.from_arg_list, 67 "STRTOL": exp.FromBase.from_arg_list, 68 } 69 70 NO_PAREN_FUNCTION_PARSERS = { 71 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 72 "APPROXIMATE": lambda self: self._parse_approximate_count(), 73 } 74 75 def _parse_table( 76 self, 77 schema: bool = False, 78 joins: bool = False, 79 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 80 parse_bracket: bool = False, 81 ) -> t.Optional[exp.Expression]: 82 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 83 unpivot = self._match(TokenType.UNPIVOT) 84 table = super()._parse_table( 85 schema=schema, 86 joins=joins, 87 alias_tokens=alias_tokens, 88 parse_bracket=parse_bracket, 89 ) 90 91 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 92 93 def _parse_types( 94 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 95 ) -> t.Optional[exp.Expression]: 96 this = super()._parse_types( 97 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 98 ) 99 100 if ( 101 isinstance(this, exp.DataType) 102 and this.is_type("varchar") 103 and this.expressions 104 and this.expressions[0].this == exp.column("MAX") 105 ): 106 this.set("expressions", [exp.var("MAX")]) 107 108 return this 109 110 def _parse_convert( 111 self, strict: bool, safe: t.Optional[bool] = None 112 ) -> t.Optional[exp.Expression]: 113 to = self._parse_types() 114 self._match(TokenType.COMMA) 115 this = self._parse_bitwise() 116 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 117 118 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 119 index = self._index - 1 120 func = self._parse_function() 121 122 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 123 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 124 self._retreat(index) 125 return None 126 127 def _parse_query_modifiers( 128 self, this: t.Optional[exp.Expression] 129 ) -> t.Optional[exp.Expression]: 130 this = super()._parse_query_modifiers(this) 131 132 if this: 133 refs = set() 134 135 for i, join in enumerate(this.args.get("joins", [])): 136 refs.add( 137 ( 138 this.args["from"] if i == 0 else this.args["joins"][i - 1] 139 ).alias_or_name.lower() 140 ) 141 table = join.this 142 143 if isinstance(table, exp.Table): 144 if table.parts[0].name.lower() in refs: 145 table.replace(table.to_column()) 146 return this 147 148 class Tokenizer(Postgres.Tokenizer): 149 BIT_STRINGS = [] 150 HEX_STRINGS = [] 151 STRING_ESCAPES = ["\\", "'"] 152 153 KEYWORDS = { 154 **Postgres.Tokenizer.KEYWORDS, 155 "HLLSKETCH": TokenType.HLLSKETCH, 156 "SUPER": TokenType.SUPER, 157 "SYSDATE": TokenType.CURRENT_TIMESTAMP, 158 "TOP": TokenType.TOP, 159 "UNLOAD": TokenType.COMMAND, 160 "VARBYTE": TokenType.VARBINARY, 161 } 162 163 # Redshift allows # to appear as a table identifier prefix 164 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 165 SINGLE_TOKENS.pop("#") 166 167 class Generator(Postgres.Generator): 168 LOCKING_READS_SUPPORTED = False 169 QUERY_HINTS = False 170 VALUES_AS_TABLE = False 171 TZ_TO_WITH_TIME_ZONE = True 172 NVL2_SUPPORTED = True 173 LAST_DAY_SUPPORTS_DATE_PART = False 174 175 TYPE_MAPPING = { 176 **Postgres.Generator.TYPE_MAPPING, 177 exp.DataType.Type.BINARY: "VARBYTE", 178 exp.DataType.Type.INT: "INTEGER", 179 exp.DataType.Type.TIMETZ: "TIME", 180 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 181 exp.DataType.Type.VARBINARY: "VARBYTE", 182 } 183 184 PROPERTIES_LOCATION = { 185 **Postgres.Generator.PROPERTIES_LOCATION, 186 exp.LikeProperty: exp.Properties.Location.POST_WITH, 187 } 188 189 TRANSFORMS = { 190 **Postgres.Generator.TRANSFORMS, 191 exp.Concat: concat_to_dpipe_sql, 192 exp.ConcatWs: concat_ws_to_dpipe_sql, 193 exp.ApproxDistinct: lambda self, e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 194 exp.CurrentTimestamp: lambda self, e: "SYSDATE", 195 exp.DateAdd: date_delta_sql("DATEADD"), 196 exp.DateDiff: date_delta_sql("DATEDIFF"), 197 exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})", 198 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 199 exp.FromBase: rename_func("STRTOL"), 200 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 201 exp.JSONExtract: _json_sql, 202 exp.JSONExtractScalar: _json_sql, 203 exp.GroupConcat: rename_func("LISTAGG"), 204 exp.ParseJSON: rename_func("JSON_PARSE"), 205 exp.Select: transforms.preprocess( 206 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 207 ), 208 exp.SortKeyProperty: lambda self, e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 209 exp.TableSample: no_tablesample_sql, 210 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 211 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 212 } 213 214 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 215 TRANSFORMS.pop(exp.Pivot) 216 217 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 218 TRANSFORMS.pop(exp.Pow) 219 220 # Redshift supports ANY_VALUE(..) 221 TRANSFORMS.pop(exp.AnyValue) 222 223 # Redshift supports LAST_DAY(..) 224 TRANSFORMS.pop(exp.LastDay) 225 226 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 227 228 def with_properties(self, properties: exp.Properties) -> str: 229 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 230 return self.properties(properties, prefix=" ", suffix="") 231 232 def datatype_sql(self, expression: exp.DataType) -> str: 233 """ 234 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 235 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 236 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 237 `TEXT` to `VARCHAR`. 238 """ 239 if expression.is_type("text"): 240 expression.set("this", exp.DataType.Type.VARCHAR) 241 precision = expression.args.get("expressions") 242 243 if not precision: 244 expression.append("expressions", exp.var("MAX")) 245 246 return super().datatype_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Determines whether or not user-defined data types are supported.
Associates this dialect's time formats with their equivalent Python strftime
format.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- SAFE_DIVISION
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
53 class Parser(Postgres.Parser): 54 FUNCTIONS = { 55 **Postgres.Parser.FUNCTIONS, 56 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 57 this=seq_get(args, 0), 58 expression=seq_get(args, 1), 59 unit=exp.var("month"), 60 return_type=exp.DataType.build("TIMESTAMP"), 61 ), 62 "DATEADD": _parse_date_delta(exp.TsOrDsAdd), 63 "DATE_ADD": _parse_date_delta(exp.TsOrDsAdd), 64 "DATEDIFF": _parse_date_delta(exp.TsOrDsDiff), 65 "DATE_DIFF": _parse_date_delta(exp.TsOrDsDiff), 66 "LISTAGG": exp.GroupConcat.from_arg_list, 67 "STRTOL": exp.FromBase.from_arg_list, 68 } 69 70 NO_PAREN_FUNCTION_PARSERS = { 71 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 72 "APPROXIMATE": lambda self: self._parse_approximate_count(), 73 } 74 75 def _parse_table( 76 self, 77 schema: bool = False, 78 joins: bool = False, 79 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 80 parse_bracket: bool = False, 81 ) -> t.Optional[exp.Expression]: 82 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 83 unpivot = self._match(TokenType.UNPIVOT) 84 table = super()._parse_table( 85 schema=schema, 86 joins=joins, 87 alias_tokens=alias_tokens, 88 parse_bracket=parse_bracket, 89 ) 90 91 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 92 93 def _parse_types( 94 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 95 ) -> t.Optional[exp.Expression]: 96 this = super()._parse_types( 97 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 98 ) 99 100 if ( 101 isinstance(this, exp.DataType) 102 and this.is_type("varchar") 103 and this.expressions 104 and this.expressions[0].this == exp.column("MAX") 105 ): 106 this.set("expressions", [exp.var("MAX")]) 107 108 return this 109 110 def _parse_convert( 111 self, strict: bool, safe: t.Optional[bool] = None 112 ) -> t.Optional[exp.Expression]: 113 to = self._parse_types() 114 self._match(TokenType.COMMA) 115 this = self._parse_bitwise() 116 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 117 118 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 119 index = self._index - 1 120 func = self._parse_function() 121 122 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 123 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 124 self._retreat(index) 125 return None 126 127 def _parse_query_modifiers( 128 self, this: t.Optional[exp.Expression] 129 ) -> t.Optional[exp.Expression]: 130 this = super()._parse_query_modifiers(this) 131 132 if this: 133 refs = set() 134 135 for i, join in enumerate(this.args.get("joins", [])): 136 refs.add( 137 ( 138 this.args["from"] if i == 0 else this.args["joins"][i - 1] 139 ).alias_or_name.lower() 140 ) 141 table = join.this 142 143 if isinstance(table, exp.Table): 144 if table.parts[0].name.lower() in refs: 145 table.replace(table.to_column()) 146 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
148 class Tokenizer(Postgres.Tokenizer): 149 BIT_STRINGS = [] 150 HEX_STRINGS = [] 151 STRING_ESCAPES = ["\\", "'"] 152 153 KEYWORDS = { 154 **Postgres.Tokenizer.KEYWORDS, 155 "HLLSKETCH": TokenType.HLLSKETCH, 156 "SUPER": TokenType.SUPER, 157 "SYSDATE": TokenType.CURRENT_TIMESTAMP, 158 "TOP": TokenType.TOP, 159 "UNLOAD": TokenType.COMMAND, 160 "VARBYTE": TokenType.VARBINARY, 161 } 162 163 # Redshift allows # to appear as a table identifier prefix 164 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 165 SINGLE_TOKENS.pop("#")
Inherited Members
167 class Generator(Postgres.Generator): 168 LOCKING_READS_SUPPORTED = False 169 QUERY_HINTS = False 170 VALUES_AS_TABLE = False 171 TZ_TO_WITH_TIME_ZONE = True 172 NVL2_SUPPORTED = True 173 LAST_DAY_SUPPORTS_DATE_PART = False 174 175 TYPE_MAPPING = { 176 **Postgres.Generator.TYPE_MAPPING, 177 exp.DataType.Type.BINARY: "VARBYTE", 178 exp.DataType.Type.INT: "INTEGER", 179 exp.DataType.Type.TIMETZ: "TIME", 180 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 181 exp.DataType.Type.VARBINARY: "VARBYTE", 182 } 183 184 PROPERTIES_LOCATION = { 185 **Postgres.Generator.PROPERTIES_LOCATION, 186 exp.LikeProperty: exp.Properties.Location.POST_WITH, 187 } 188 189 TRANSFORMS = { 190 **Postgres.Generator.TRANSFORMS, 191 exp.Concat: concat_to_dpipe_sql, 192 exp.ConcatWs: concat_ws_to_dpipe_sql, 193 exp.ApproxDistinct: lambda self, e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 194 exp.CurrentTimestamp: lambda self, e: "SYSDATE", 195 exp.DateAdd: date_delta_sql("DATEADD"), 196 exp.DateDiff: date_delta_sql("DATEDIFF"), 197 exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})", 198 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 199 exp.FromBase: rename_func("STRTOL"), 200 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 201 exp.JSONExtract: _json_sql, 202 exp.JSONExtractScalar: _json_sql, 203 exp.GroupConcat: rename_func("LISTAGG"), 204 exp.ParseJSON: rename_func("JSON_PARSE"), 205 exp.Select: transforms.preprocess( 206 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 207 ), 208 exp.SortKeyProperty: lambda self, e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 209 exp.TableSample: no_tablesample_sql, 210 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 211 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 212 } 213 214 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 215 TRANSFORMS.pop(exp.Pivot) 216 217 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 218 TRANSFORMS.pop(exp.Pow) 219 220 # Redshift supports ANY_VALUE(..) 221 TRANSFORMS.pop(exp.AnyValue) 222 223 # Redshift supports LAST_DAY(..) 224 TRANSFORMS.pop(exp.LastDay) 225 226 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 227 228 def with_properties(self, properties: exp.Properties) -> str: 229 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 230 return self.properties(properties, prefix=" ", suffix="") 231 232 def datatype_sql(self, expression: exp.DataType) -> str: 233 """ 234 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 235 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 236 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 237 `TEXT` to `VARCHAR`. 238 """ 239 if expression.is_type("text"): 240 expression.set("this", exp.DataType.Type.VARCHAR) 241 precision = expression.args.get("expressions") 242 243 if not precision: 244 expression.append("expressions", exp.var("MAX")) 245 246 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
228 def with_properties(self, properties: exp.Properties) -> str: 229 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 230 return self.properties(properties, prefix=" ", suffix="")
Redshift doesn't have WITH
as part of their with_properties so we remove it
232 def datatype_sql(self, expression: exp.DataType) -> str: 233 """ 234 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 235 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 236 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 237 `TEXT` to `VARCHAR`. 238 """ 239 if expression.is_type("text"): 240 expression.set("this", exp.DataType.Type.VARCHAR) 241 precision = expression.args.get("expressions") 242 243 if not precision: 244 expression.append("expressions", exp.var("MAX")) 245 246 return super().datatype_sql(expression)
Redshift converts the TEXT
data type to VARCHAR(255)
by default when people more generally mean
VARCHAR of max length which is VARCHAR(max)
in Redshift. Therefore if we get a TEXT
data type
without precision we convert it to VARCHAR(max)
and if it does have precision then we just convert
TEXT
to VARCHAR
.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql