sqlglot.dialects.redshift
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, transforms 6from sqlglot.dialects.dialect import ( 7 NormalizationStrategy, 8 concat_to_dpipe_sql, 9 concat_ws_to_dpipe_sql, 10 date_delta_sql, 11 generatedasidentitycolumnconstraint_sql, 12 json_path_segments, 13 no_tablesample_sql, 14 parse_json_extract_path, 15 rename_func, 16) 17from sqlglot.dialects.postgres import Postgres 18from sqlglot.helper import seq_get 19from sqlglot.tokens import TokenType 20 21if t.TYPE_CHECKING: 22 from sqlglot._typing import E 23 24 25def _json_extract_sql( 26 self: Redshift.Generator, expression: exp.JSONExtract | exp.JSONExtractScalar 27) -> str: 28 return self.func( 29 "JSON_EXTRACT_PATH_TEXT", 30 expression.this, 31 *json_path_segments(self, expression.expression), 32 expression.args.get("null_if_invalid"), 33 ) 34 35 36def _parse_date_delta(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 37 def _parse_delta(args: t.List) -> E: 38 expr = expr_type(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 39 if expr_type is exp.TsOrDsAdd: 40 expr.set("return_type", exp.DataType.build("TIMESTAMP")) 41 42 return expr 43 44 return _parse_delta 45 46 47class Redshift(Postgres): 48 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 49 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 50 51 SUPPORTS_USER_DEFINED_TYPES = False 52 INDEX_OFFSET = 0 53 54 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 55 TIME_MAPPING = { 56 **Postgres.TIME_MAPPING, 57 "MON": "%b", 58 "HH": "%H", 59 } 60 61 class Parser(Postgres.Parser): 62 FUNCTIONS = { 63 **Postgres.Parser.FUNCTIONS, 64 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 65 this=seq_get(args, 0), 66 expression=seq_get(args, 1), 67 unit=exp.var("month"), 68 return_type=exp.DataType.build("TIMESTAMP"), 69 ), 70 "DATEADD": _parse_date_delta(exp.TsOrDsAdd), 71 "DATE_ADD": _parse_date_delta(exp.TsOrDsAdd), 72 "DATEDIFF": _parse_date_delta(exp.TsOrDsDiff), 73 "DATE_DIFF": _parse_date_delta(exp.TsOrDsDiff), 74 "GETDATE": exp.CurrentTimestamp.from_arg_list, 75 "JSON_EXTRACT_PATH_TEXT": parse_json_extract_path( 76 exp.JSONExtractScalar, supports_null_if_invalid=True 77 ), 78 "LISTAGG": exp.GroupConcat.from_arg_list, 79 "STRTOL": exp.FromBase.from_arg_list, 80 } 81 82 NO_PAREN_FUNCTION_PARSERS = { 83 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 84 "APPROXIMATE": lambda self: self._parse_approximate_count(), 85 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 86 } 87 88 def _parse_table( 89 self, 90 schema: bool = False, 91 joins: bool = False, 92 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 93 parse_bracket: bool = False, 94 is_db_reference: bool = False, 95 ) -> t.Optional[exp.Expression]: 96 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 97 unpivot = self._match(TokenType.UNPIVOT) 98 table = super()._parse_table( 99 schema=schema, 100 joins=joins, 101 alias_tokens=alias_tokens, 102 parse_bracket=parse_bracket, 103 is_db_reference=is_db_reference, 104 ) 105 106 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 107 108 def _parse_types( 109 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 110 ) -> t.Optional[exp.Expression]: 111 this = super()._parse_types( 112 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 113 ) 114 115 if ( 116 isinstance(this, exp.DataType) 117 and this.is_type("varchar") 118 and this.expressions 119 and this.expressions[0].this == exp.column("MAX") 120 ): 121 this.set("expressions", [exp.var("MAX")]) 122 123 return this 124 125 def _parse_convert( 126 self, strict: bool, safe: t.Optional[bool] = None 127 ) -> t.Optional[exp.Expression]: 128 to = self._parse_types() 129 self._match(TokenType.COMMA) 130 this = self._parse_bitwise() 131 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 132 133 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 134 index = self._index - 1 135 func = self._parse_function() 136 137 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 138 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 139 self._retreat(index) 140 return None 141 142 def _parse_query_modifiers( 143 self, this: t.Optional[exp.Expression] 144 ) -> t.Optional[exp.Expression]: 145 this = super()._parse_query_modifiers(this) 146 147 if this: 148 refs = set() 149 150 for i, join in enumerate(this.args.get("joins", [])): 151 refs.add( 152 ( 153 this.args["from"] if i == 0 else this.args["joins"][i - 1] 154 ).alias_or_name.lower() 155 ) 156 table = join.this 157 158 if isinstance(table, exp.Table): 159 if table.parts[0].name.lower() in refs: 160 table.replace(table.to_column()) 161 return this 162 163 class Tokenizer(Postgres.Tokenizer): 164 BIT_STRINGS = [] 165 HEX_STRINGS = [] 166 STRING_ESCAPES = ["\\", "'"] 167 168 KEYWORDS = { 169 **Postgres.Tokenizer.KEYWORDS, 170 "HLLSKETCH": TokenType.HLLSKETCH, 171 "SUPER": TokenType.SUPER, 172 "TOP": TokenType.TOP, 173 "UNLOAD": TokenType.COMMAND, 174 "VARBYTE": TokenType.VARBINARY, 175 } 176 177 # Redshift allows # to appear as a table identifier prefix 178 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 179 SINGLE_TOKENS.pop("#") 180 181 class Generator(Postgres.Generator): 182 LOCKING_READS_SUPPORTED = False 183 QUERY_HINTS = False 184 VALUES_AS_TABLE = False 185 TZ_TO_WITH_TIME_ZONE = True 186 NVL2_SUPPORTED = True 187 LAST_DAY_SUPPORTS_DATE_PART = False 188 189 TYPE_MAPPING = { 190 **Postgres.Generator.TYPE_MAPPING, 191 exp.DataType.Type.BINARY: "VARBYTE", 192 exp.DataType.Type.INT: "INTEGER", 193 exp.DataType.Type.TIMETZ: "TIME", 194 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 195 exp.DataType.Type.VARBINARY: "VARBYTE", 196 } 197 198 TRANSFORMS = { 199 **Postgres.Generator.TRANSFORMS, 200 exp.Concat: concat_to_dpipe_sql, 201 exp.ConcatWs: concat_ws_to_dpipe_sql, 202 exp.ApproxDistinct: lambda self, 203 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 204 exp.CurrentTimestamp: lambda self, e: ( 205 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 206 ), 207 exp.DateAdd: date_delta_sql("DATEADD"), 208 exp.DateDiff: date_delta_sql("DATEDIFF"), 209 exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})", 210 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 211 exp.FromBase: rename_func("STRTOL"), 212 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 213 exp.JSONExtract: _json_extract_sql, 214 exp.JSONExtractScalar: _json_extract_sql, 215 exp.GroupConcat: rename_func("LISTAGG"), 216 exp.ParseJSON: rename_func("JSON_PARSE"), 217 exp.Select: transforms.preprocess( 218 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 219 ), 220 exp.SortKeyProperty: lambda self, 221 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 222 exp.TableSample: no_tablesample_sql, 223 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 224 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 225 } 226 227 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 228 TRANSFORMS.pop(exp.Pivot) 229 230 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 231 TRANSFORMS.pop(exp.Pow) 232 233 # Redshift supports ANY_VALUE(..) 234 TRANSFORMS.pop(exp.AnyValue) 235 236 # Redshift supports LAST_DAY(..) 237 TRANSFORMS.pop(exp.LastDay) 238 239 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 240 241 def with_properties(self, properties: exp.Properties) -> str: 242 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 243 return self.properties(properties, prefix=" ", suffix="") 244 245 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 246 if expression.is_type(exp.DataType.Type.JSON): 247 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 248 return self.sql(expression, "this") 249 250 return super().cast_sql(expression, safe_prefix=safe_prefix) 251 252 def datatype_sql(self, expression: exp.DataType) -> str: 253 """ 254 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 255 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 256 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 257 `TEXT` to `VARCHAR`. 258 """ 259 if expression.is_type("text"): 260 expression.set("this", exp.DataType.Type.VARCHAR) 261 precision = expression.args.get("expressions") 262 263 if not precision: 264 expression.append("expressions", exp.var("MAX")) 265 266 return super().datatype_sql(expression)
48class Redshift(Postgres): 49 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 50 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 51 52 SUPPORTS_USER_DEFINED_TYPES = False 53 INDEX_OFFSET = 0 54 55 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 56 TIME_MAPPING = { 57 **Postgres.TIME_MAPPING, 58 "MON": "%b", 59 "HH": "%H", 60 } 61 62 class Parser(Postgres.Parser): 63 FUNCTIONS = { 64 **Postgres.Parser.FUNCTIONS, 65 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 66 this=seq_get(args, 0), 67 expression=seq_get(args, 1), 68 unit=exp.var("month"), 69 return_type=exp.DataType.build("TIMESTAMP"), 70 ), 71 "DATEADD": _parse_date_delta(exp.TsOrDsAdd), 72 "DATE_ADD": _parse_date_delta(exp.TsOrDsAdd), 73 "DATEDIFF": _parse_date_delta(exp.TsOrDsDiff), 74 "DATE_DIFF": _parse_date_delta(exp.TsOrDsDiff), 75 "GETDATE": exp.CurrentTimestamp.from_arg_list, 76 "JSON_EXTRACT_PATH_TEXT": parse_json_extract_path( 77 exp.JSONExtractScalar, supports_null_if_invalid=True 78 ), 79 "LISTAGG": exp.GroupConcat.from_arg_list, 80 "STRTOL": exp.FromBase.from_arg_list, 81 } 82 83 NO_PAREN_FUNCTION_PARSERS = { 84 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 85 "APPROXIMATE": lambda self: self._parse_approximate_count(), 86 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 87 } 88 89 def _parse_table( 90 self, 91 schema: bool = False, 92 joins: bool = False, 93 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 94 parse_bracket: bool = False, 95 is_db_reference: bool = False, 96 ) -> t.Optional[exp.Expression]: 97 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 98 unpivot = self._match(TokenType.UNPIVOT) 99 table = super()._parse_table( 100 schema=schema, 101 joins=joins, 102 alias_tokens=alias_tokens, 103 parse_bracket=parse_bracket, 104 is_db_reference=is_db_reference, 105 ) 106 107 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 108 109 def _parse_types( 110 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 111 ) -> t.Optional[exp.Expression]: 112 this = super()._parse_types( 113 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 114 ) 115 116 if ( 117 isinstance(this, exp.DataType) 118 and this.is_type("varchar") 119 and this.expressions 120 and this.expressions[0].this == exp.column("MAX") 121 ): 122 this.set("expressions", [exp.var("MAX")]) 123 124 return this 125 126 def _parse_convert( 127 self, strict: bool, safe: t.Optional[bool] = None 128 ) -> t.Optional[exp.Expression]: 129 to = self._parse_types() 130 self._match(TokenType.COMMA) 131 this = self._parse_bitwise() 132 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 133 134 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 135 index = self._index - 1 136 func = self._parse_function() 137 138 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 139 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 140 self._retreat(index) 141 return None 142 143 def _parse_query_modifiers( 144 self, this: t.Optional[exp.Expression] 145 ) -> t.Optional[exp.Expression]: 146 this = super()._parse_query_modifiers(this) 147 148 if this: 149 refs = set() 150 151 for i, join in enumerate(this.args.get("joins", [])): 152 refs.add( 153 ( 154 this.args["from"] if i == 0 else this.args["joins"][i - 1] 155 ).alias_or_name.lower() 156 ) 157 table = join.this 158 159 if isinstance(table, exp.Table): 160 if table.parts[0].name.lower() in refs: 161 table.replace(table.to_column()) 162 return this 163 164 class Tokenizer(Postgres.Tokenizer): 165 BIT_STRINGS = [] 166 HEX_STRINGS = [] 167 STRING_ESCAPES = ["\\", "'"] 168 169 KEYWORDS = { 170 **Postgres.Tokenizer.KEYWORDS, 171 "HLLSKETCH": TokenType.HLLSKETCH, 172 "SUPER": TokenType.SUPER, 173 "TOP": TokenType.TOP, 174 "UNLOAD": TokenType.COMMAND, 175 "VARBYTE": TokenType.VARBINARY, 176 } 177 178 # Redshift allows # to appear as a table identifier prefix 179 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 180 SINGLE_TOKENS.pop("#") 181 182 class Generator(Postgres.Generator): 183 LOCKING_READS_SUPPORTED = False 184 QUERY_HINTS = False 185 VALUES_AS_TABLE = False 186 TZ_TO_WITH_TIME_ZONE = True 187 NVL2_SUPPORTED = True 188 LAST_DAY_SUPPORTS_DATE_PART = False 189 190 TYPE_MAPPING = { 191 **Postgres.Generator.TYPE_MAPPING, 192 exp.DataType.Type.BINARY: "VARBYTE", 193 exp.DataType.Type.INT: "INTEGER", 194 exp.DataType.Type.TIMETZ: "TIME", 195 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 196 exp.DataType.Type.VARBINARY: "VARBYTE", 197 } 198 199 TRANSFORMS = { 200 **Postgres.Generator.TRANSFORMS, 201 exp.Concat: concat_to_dpipe_sql, 202 exp.ConcatWs: concat_ws_to_dpipe_sql, 203 exp.ApproxDistinct: lambda self, 204 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 205 exp.CurrentTimestamp: lambda self, e: ( 206 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 207 ), 208 exp.DateAdd: date_delta_sql("DATEADD"), 209 exp.DateDiff: date_delta_sql("DATEDIFF"), 210 exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})", 211 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 212 exp.FromBase: rename_func("STRTOL"), 213 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 214 exp.JSONExtract: _json_extract_sql, 215 exp.JSONExtractScalar: _json_extract_sql, 216 exp.GroupConcat: rename_func("LISTAGG"), 217 exp.ParseJSON: rename_func("JSON_PARSE"), 218 exp.Select: transforms.preprocess( 219 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 220 ), 221 exp.SortKeyProperty: lambda self, 222 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 223 exp.TableSample: no_tablesample_sql, 224 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 225 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 226 } 227 228 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 229 TRANSFORMS.pop(exp.Pivot) 230 231 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 232 TRANSFORMS.pop(exp.Pow) 233 234 # Redshift supports ANY_VALUE(..) 235 TRANSFORMS.pop(exp.AnyValue) 236 237 # Redshift supports LAST_DAY(..) 238 TRANSFORMS.pop(exp.LastDay) 239 240 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 241 242 def with_properties(self, properties: exp.Properties) -> str: 243 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 244 return self.properties(properties, prefix=" ", suffix="") 245 246 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 247 if expression.is_type(exp.DataType.Type.JSON): 248 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 249 return self.sql(expression, "this") 250 251 return super().cast_sql(expression, safe_prefix=safe_prefix) 252 253 def datatype_sql(self, expression: exp.DataType) -> str: 254 """ 255 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 256 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 257 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 258 `TEXT` to `VARCHAR`. 259 """ 260 if expression.is_type("text"): 261 expression.set("this", exp.DataType.Type.VARCHAR) 262 precision = expression.args.get("expressions") 263 264 if not precision: 265 expression.append("expressions", exp.var("MAX")) 266 267 return super().datatype_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Determines whether or not user-defined data types are supported.
Associates this dialect's time formats with their equivalent Python strftime
format.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- SAFE_DIVISION
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
62 class Parser(Postgres.Parser): 63 FUNCTIONS = { 64 **Postgres.Parser.FUNCTIONS, 65 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 66 this=seq_get(args, 0), 67 expression=seq_get(args, 1), 68 unit=exp.var("month"), 69 return_type=exp.DataType.build("TIMESTAMP"), 70 ), 71 "DATEADD": _parse_date_delta(exp.TsOrDsAdd), 72 "DATE_ADD": _parse_date_delta(exp.TsOrDsAdd), 73 "DATEDIFF": _parse_date_delta(exp.TsOrDsDiff), 74 "DATE_DIFF": _parse_date_delta(exp.TsOrDsDiff), 75 "GETDATE": exp.CurrentTimestamp.from_arg_list, 76 "JSON_EXTRACT_PATH_TEXT": parse_json_extract_path( 77 exp.JSONExtractScalar, supports_null_if_invalid=True 78 ), 79 "LISTAGG": exp.GroupConcat.from_arg_list, 80 "STRTOL": exp.FromBase.from_arg_list, 81 } 82 83 NO_PAREN_FUNCTION_PARSERS = { 84 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 85 "APPROXIMATE": lambda self: self._parse_approximate_count(), 86 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 87 } 88 89 def _parse_table( 90 self, 91 schema: bool = False, 92 joins: bool = False, 93 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 94 parse_bracket: bool = False, 95 is_db_reference: bool = False, 96 ) -> t.Optional[exp.Expression]: 97 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 98 unpivot = self._match(TokenType.UNPIVOT) 99 table = super()._parse_table( 100 schema=schema, 101 joins=joins, 102 alias_tokens=alias_tokens, 103 parse_bracket=parse_bracket, 104 is_db_reference=is_db_reference, 105 ) 106 107 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 108 109 def _parse_types( 110 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 111 ) -> t.Optional[exp.Expression]: 112 this = super()._parse_types( 113 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 114 ) 115 116 if ( 117 isinstance(this, exp.DataType) 118 and this.is_type("varchar") 119 and this.expressions 120 and this.expressions[0].this == exp.column("MAX") 121 ): 122 this.set("expressions", [exp.var("MAX")]) 123 124 return this 125 126 def _parse_convert( 127 self, strict: bool, safe: t.Optional[bool] = None 128 ) -> t.Optional[exp.Expression]: 129 to = self._parse_types() 130 self._match(TokenType.COMMA) 131 this = self._parse_bitwise() 132 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 133 134 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 135 index = self._index - 1 136 func = self._parse_function() 137 138 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 139 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 140 self._retreat(index) 141 return None 142 143 def _parse_query_modifiers( 144 self, this: t.Optional[exp.Expression] 145 ) -> t.Optional[exp.Expression]: 146 this = super()._parse_query_modifiers(this) 147 148 if this: 149 refs = set() 150 151 for i, join in enumerate(this.args.get("joins", [])): 152 refs.add( 153 ( 154 this.args["from"] if i == 0 else this.args["joins"][i - 1] 155 ).alias_or_name.lower() 156 ) 157 table = join.this 158 159 if isinstance(table, exp.Table): 160 if table.parts[0].name.lower() in refs: 161 table.replace(table.to_column()) 162 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
164 class Tokenizer(Postgres.Tokenizer): 165 BIT_STRINGS = [] 166 HEX_STRINGS = [] 167 STRING_ESCAPES = ["\\", "'"] 168 169 KEYWORDS = { 170 **Postgres.Tokenizer.KEYWORDS, 171 "HLLSKETCH": TokenType.HLLSKETCH, 172 "SUPER": TokenType.SUPER, 173 "TOP": TokenType.TOP, 174 "UNLOAD": TokenType.COMMAND, 175 "VARBYTE": TokenType.VARBINARY, 176 } 177 178 # Redshift allows # to appear as a table identifier prefix 179 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 180 SINGLE_TOKENS.pop("#")
Inherited Members
182 class Generator(Postgres.Generator): 183 LOCKING_READS_SUPPORTED = False 184 QUERY_HINTS = False 185 VALUES_AS_TABLE = False 186 TZ_TO_WITH_TIME_ZONE = True 187 NVL2_SUPPORTED = True 188 LAST_DAY_SUPPORTS_DATE_PART = False 189 190 TYPE_MAPPING = { 191 **Postgres.Generator.TYPE_MAPPING, 192 exp.DataType.Type.BINARY: "VARBYTE", 193 exp.DataType.Type.INT: "INTEGER", 194 exp.DataType.Type.TIMETZ: "TIME", 195 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 196 exp.DataType.Type.VARBINARY: "VARBYTE", 197 } 198 199 TRANSFORMS = { 200 **Postgres.Generator.TRANSFORMS, 201 exp.Concat: concat_to_dpipe_sql, 202 exp.ConcatWs: concat_ws_to_dpipe_sql, 203 exp.ApproxDistinct: lambda self, 204 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 205 exp.CurrentTimestamp: lambda self, e: ( 206 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 207 ), 208 exp.DateAdd: date_delta_sql("DATEADD"), 209 exp.DateDiff: date_delta_sql("DATEDIFF"), 210 exp.DistKeyProperty: lambda self, e: f"DISTKEY({e.name})", 211 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 212 exp.FromBase: rename_func("STRTOL"), 213 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 214 exp.JSONExtract: _json_extract_sql, 215 exp.JSONExtractScalar: _json_extract_sql, 216 exp.GroupConcat: rename_func("LISTAGG"), 217 exp.ParseJSON: rename_func("JSON_PARSE"), 218 exp.Select: transforms.preprocess( 219 [transforms.eliminate_distinct_on, transforms.eliminate_semi_and_anti_joins] 220 ), 221 exp.SortKeyProperty: lambda self, 222 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 223 exp.TableSample: no_tablesample_sql, 224 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 225 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 226 } 227 228 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 229 TRANSFORMS.pop(exp.Pivot) 230 231 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 232 TRANSFORMS.pop(exp.Pow) 233 234 # Redshift supports ANY_VALUE(..) 235 TRANSFORMS.pop(exp.AnyValue) 236 237 # Redshift supports LAST_DAY(..) 238 TRANSFORMS.pop(exp.LastDay) 239 240 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 241 242 def with_properties(self, properties: exp.Properties) -> str: 243 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 244 return self.properties(properties, prefix=" ", suffix="") 245 246 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 247 if expression.is_type(exp.DataType.Type.JSON): 248 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 249 return self.sql(expression, "this") 250 251 return super().cast_sql(expression, safe_prefix=safe_prefix) 252 253 def datatype_sql(self, expression: exp.DataType) -> str: 254 """ 255 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 256 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 257 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 258 `TEXT` to `VARCHAR`. 259 """ 260 if expression.is_type("text"): 261 expression.set("this", exp.DataType.Type.VARCHAR) 262 precision = expression.args.get("expressions") 263 264 if not precision: 265 expression.append("expressions", exp.var("MAX")) 266 267 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
242 def with_properties(self, properties: exp.Properties) -> str: 243 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 244 return self.properties(properties, prefix=" ", suffix="")
Redshift doesn't have WITH
as part of their with_properties so we remove it
246 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 247 if expression.is_type(exp.DataType.Type.JSON): 248 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 249 return self.sql(expression, "this") 250 251 return super().cast_sql(expression, safe_prefix=safe_prefix)
253 def datatype_sql(self, expression: exp.DataType) -> str: 254 """ 255 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 256 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 257 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 258 `TEXT` to `VARCHAR`. 259 """ 260 if expression.is_type("text"): 261 expression.set("this", exp.DataType.Type.VARCHAR) 262 precision = expression.args.get("expressions") 263 264 if not precision: 265 expression.append("expressions", exp.var("MAX")) 266 267 return super().datatype_sql(expression)
Redshift converts the TEXT
data type to VARCHAR(255)
by default when people more generally mean
VARCHAR of max length which is VARCHAR(max)
in Redshift. Therefore if we get a TEXT
data type
without precision we convert it to VARCHAR(max)
and if it does have precision then we just convert
TEXT
to VARCHAR
.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- sqlglot.dialects.postgres.Postgres.Generator
- SINGLE_STRING_INTERVAL
- RENAME_TABLE_WITH_DB
- JOIN_HINTS
- TABLE_HINTS
- PARAMETER_TOKEN
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_SEED_KEYWORD
- SUPPORTS_SELECT_INTO
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- SUPPORTED_JSON_PATH_PARTS
- PROPERTIES_LOCATION
- bracket_sql
- matchagainst_sql