sqlglot.dialects.tsql
1from __future__ import annotations 2 3import re 4import typing as t 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 min_or_least, 10 parse_date_delta, 11 rename_func, 12) 13from sqlglot.expressions import DataType 14from sqlglot.helper import seq_get 15from sqlglot.time import format_time 16from sqlglot.tokens import TokenType 17 18FULL_FORMAT_TIME_MAPPING = { 19 "weekday": "%A", 20 "dw": "%A", 21 "w": "%A", 22 "month": "%B", 23 "mm": "%B", 24 "m": "%B", 25} 26 27DATE_DELTA_INTERVAL = { 28 "year": "year", 29 "yyyy": "year", 30 "yy": "year", 31 "quarter": "quarter", 32 "qq": "quarter", 33 "q": "quarter", 34 "month": "month", 35 "mm": "month", 36 "m": "month", 37 "week": "week", 38 "ww": "week", 39 "wk": "week", 40 "day": "day", 41 "dd": "day", 42 "d": "day", 43} 44 45 46DATE_FMT_RE = re.compile("([dD]{1,2})|([mM]{1,2})|([yY]{1,4})|([hH]{1,2})|([sS]{1,2})") 47 48# N = Numeric, C=Currency 49TRANSPILE_SAFE_NUMBER_FMT = {"N", "C"} 50 51 52def _format_time_lambda(exp_class, full_format_mapping=None, default=None): 53 def _format_time(args): 54 return exp_class( 55 this=seq_get(args, 1), 56 format=exp.Literal.string( 57 format_time( 58 seq_get(args, 0).name or (TSQL.time_format if default is True else default), 59 {**TSQL.time_mapping, **FULL_FORMAT_TIME_MAPPING} 60 if full_format_mapping 61 else TSQL.time_mapping, 62 ) 63 ), 64 ) 65 66 return _format_time 67 68 69def _parse_format(args): 70 fmt = seq_get(args, 1) 71 number_fmt = fmt.name in TRANSPILE_SAFE_NUMBER_FMT or not DATE_FMT_RE.search(fmt.this) 72 if number_fmt: 73 return exp.NumberToStr(this=seq_get(args, 0), format=fmt) 74 return exp.TimeToStr( 75 this=seq_get(args, 0), 76 format=exp.Literal.string( 77 format_time(fmt.name, TSQL.format_time_mapping) 78 if len(fmt.name) == 1 79 else format_time(fmt.name, TSQL.time_mapping) 80 ), 81 ) 82 83 84def _parse_eomonth(args): 85 date = seq_get(args, 0) 86 month_lag = seq_get(args, 1) 87 unit = DATE_DELTA_INTERVAL.get("month") 88 89 if month_lag is None: 90 return exp.LastDateOfMonth(this=date) 91 92 # Remove month lag argument in parser as its compared with the number of arguments of the resulting class 93 args.remove(month_lag) 94 95 return exp.LastDateOfMonth(this=exp.DateAdd(this=date, expression=month_lag, unit=unit)) 96 97 98def generate_date_delta_with_unit_sql(self, e): 99 func = "DATEADD" if isinstance(e, exp.DateAdd) else "DATEDIFF" 100 return self.func(func, e.text("unit"), e.expression, e.this) 101 102 103def _format_sql(self, e): 104 fmt = ( 105 e.args["format"] 106 if isinstance(e, exp.NumberToStr) 107 else exp.Literal.string(format_time(e.text("format"), TSQL.inverse_time_mapping)) 108 ) 109 return self.func("FORMAT", e.this, fmt) 110 111 112def _string_agg_sql(self, e): 113 e = e.copy() 114 115 this = e.this 116 distinct = e.find(exp.Distinct) 117 if distinct: 118 # exp.Distinct can appear below an exp.Order or an exp.GroupConcat expression 119 self.unsupported("T-SQL STRING_AGG doesn't support DISTINCT.") 120 this = distinct.pop().expressions[0] 121 122 order = "" 123 if isinstance(e.this, exp.Order): 124 if e.this.this: 125 this = e.this.this.pop() 126 order = f" WITHIN GROUP ({self.sql(e.this)[1:]})" # Order has a leading space 127 128 separator = e.args.get("separator") or exp.Literal.string(",") 129 return f"STRING_AGG({self.format_args(this, separator)}){order}" 130 131 132class TSQL(Dialect): 133 null_ordering = "nulls_are_small" 134 time_format = "'yyyy-mm-dd hh:mm:ss'" 135 136 time_mapping = { 137 "year": "%Y", 138 "qq": "%q", 139 "q": "%q", 140 "quarter": "%q", 141 "dayofyear": "%j", 142 "day": "%d", 143 "dy": "%d", 144 "y": "%Y", 145 "week": "%W", 146 "ww": "%W", 147 "wk": "%W", 148 "hour": "%h", 149 "hh": "%I", 150 "minute": "%M", 151 "mi": "%M", 152 "n": "%M", 153 "second": "%S", 154 "ss": "%S", 155 "s": "%-S", 156 "millisecond": "%f", 157 "ms": "%f", 158 "weekday": "%W", 159 "dw": "%W", 160 "month": "%m", 161 "mm": "%M", 162 "m": "%-M", 163 "Y": "%Y", 164 "YYYY": "%Y", 165 "YY": "%y", 166 "MMMM": "%B", 167 "MMM": "%b", 168 "MM": "%m", 169 "M": "%-m", 170 "dd": "%d", 171 "d": "%-d", 172 "HH": "%H", 173 "H": "%-H", 174 "h": "%-I", 175 "S": "%f", 176 "yyyy": "%Y", 177 "yy": "%y", 178 } 179 180 convert_format_mapping = { 181 "0": "%b %d %Y %-I:%M%p", 182 "1": "%m/%d/%y", 183 "2": "%y.%m.%d", 184 "3": "%d/%m/%y", 185 "4": "%d.%m.%y", 186 "5": "%d-%m-%y", 187 "6": "%d %b %y", 188 "7": "%b %d, %y", 189 "8": "%H:%M:%S", 190 "9": "%b %d %Y %-I:%M:%S:%f%p", 191 "10": "mm-dd-yy", 192 "11": "yy/mm/dd", 193 "12": "yymmdd", 194 "13": "%d %b %Y %H:%M:ss:%f", 195 "14": "%H:%M:%S:%f", 196 "20": "%Y-%m-%d %H:%M:%S", 197 "21": "%Y-%m-%d %H:%M:%S.%f", 198 "22": "%m/%d/%y %-I:%M:%S %p", 199 "23": "%Y-%m-%d", 200 "24": "%H:%M:%S", 201 "25": "%Y-%m-%d %H:%M:%S.%f", 202 "100": "%b %d %Y %-I:%M%p", 203 "101": "%m/%d/%Y", 204 "102": "%Y.%m.%d", 205 "103": "%d/%m/%Y", 206 "104": "%d.%m.%Y", 207 "105": "%d-%m-%Y", 208 "106": "%d %b %Y", 209 "107": "%b %d, %Y", 210 "108": "%H:%M:%S", 211 "109": "%b %d %Y %-I:%M:%S:%f%p", 212 "110": "%m-%d-%Y", 213 "111": "%Y/%m/%d", 214 "112": "%Y%m%d", 215 "113": "%d %b %Y %H:%M:%S:%f", 216 "114": "%H:%M:%S:%f", 217 "120": "%Y-%m-%d %H:%M:%S", 218 "121": "%Y-%m-%d %H:%M:%S.%f", 219 } 220 # not sure if complete 221 format_time_mapping = { 222 "y": "%B %Y", 223 "d": "%m/%d/%Y", 224 "H": "%-H", 225 "h": "%-I", 226 "s": "%Y-%m-%d %H:%M:%S", 227 "D": "%A,%B,%Y", 228 "f": "%A,%B,%Y %-I:%M %p", 229 "F": "%A,%B,%Y %-I:%M:%S %p", 230 "g": "%m/%d/%Y %-I:%M %p", 231 "G": "%m/%d/%Y %-I:%M:%S %p", 232 "M": "%B %-d", 233 "m": "%B %-d", 234 "O": "%Y-%m-%dT%H:%M:%S", 235 "u": "%Y-%M-%D %H:%M:%S%z", 236 "U": "%A, %B %D, %Y %H:%M:%S%z", 237 "T": "%-I:%M:%S %p", 238 "t": "%-I:%M", 239 "Y": "%a %Y", 240 } 241 242 class Tokenizer(tokens.Tokenizer): 243 IDENTIFIERS = ['"', ("[", "]")] 244 245 QUOTES = ["'", '"'] 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 "DATETIME2": TokenType.DATETIME, 250 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 251 "DECLARE": TokenType.COMMAND, 252 "IMAGE": TokenType.IMAGE, 253 "MONEY": TokenType.MONEY, 254 "NTEXT": TokenType.TEXT, 255 "NVARCHAR(MAX)": TokenType.TEXT, 256 "PRINT": TokenType.COMMAND, 257 "PROC": TokenType.PROCEDURE, 258 "REAL": TokenType.FLOAT, 259 "ROWVERSION": TokenType.ROWVERSION, 260 "SMALLDATETIME": TokenType.DATETIME, 261 "SMALLMONEY": TokenType.SMALLMONEY, 262 "SQL_VARIANT": TokenType.VARIANT, 263 "TIME": TokenType.TIMESTAMP, 264 "TOP": TokenType.TOP, 265 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 266 "VARCHAR(MAX)": TokenType.TEXT, 267 "XML": TokenType.XML, 268 } 269 270 # TSQL allows @, # to appear as a variable/identifier prefix 271 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 272 SINGLE_TOKENS.pop("@") 273 SINGLE_TOKENS.pop("#") 274 275 class Parser(parser.Parser): 276 FUNCTIONS = { 277 **parser.Parser.FUNCTIONS, # type: ignore 278 "CHARINDEX": lambda args: exp.StrPosition( 279 this=seq_get(args, 1), 280 substr=seq_get(args, 0), 281 position=seq_get(args, 2), 282 ), 283 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 284 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 285 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 286 "DATEPART": _format_time_lambda(exp.TimeToStr), 287 "EOMONTH": _parse_eomonth, 288 "FORMAT": _parse_format, 289 "GETDATE": exp.CurrentTimestamp.from_arg_list, 290 "IIF": exp.If.from_arg_list, 291 "ISNULL": exp.Coalesce.from_arg_list, 292 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 293 "LEN": exp.Length.from_arg_list, 294 "REPLICATE": exp.Repeat.from_arg_list, 295 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 296 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 297 } 298 299 VAR_LENGTH_DATATYPES = { 300 DataType.Type.NVARCHAR, 301 DataType.Type.VARCHAR, 302 DataType.Type.CHAR, 303 DataType.Type.NCHAR, 304 } 305 306 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 307 TokenType.TABLE, 308 *parser.Parser.TYPE_TOKENS, # type: ignore 309 } 310 311 STATEMENT_PARSERS = { 312 **parser.Parser.STATEMENT_PARSERS, # type: ignore 313 TokenType.END: lambda self: self._parse_command(), 314 } 315 316 def _parse_system_time(self) -> t.Optional[exp.Expression]: 317 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 318 return None 319 320 if self._match_text_seq("AS", "OF"): 321 system_time = self.expression( 322 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 323 ) 324 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 325 kind = self._prev.text 326 this = self._parse_bitwise() 327 self._match_texts(("TO", "AND")) 328 expression = self._parse_bitwise() 329 system_time = self.expression( 330 exp.SystemTime, this=this, expression=expression, kind=kind 331 ) 332 elif self._match_text_seq("CONTAINED", "IN"): 333 args = self._parse_wrapped_csv(self._parse_bitwise) 334 system_time = self.expression( 335 exp.SystemTime, 336 this=seq_get(args, 0), 337 expression=seq_get(args, 1), 338 kind="CONTAINED IN", 339 ) 340 elif self._match(TokenType.ALL): 341 system_time = self.expression(exp.SystemTime, kind="ALL") 342 else: 343 system_time = None 344 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 345 346 return system_time 347 348 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 349 table = super()._parse_table_parts(schema=schema) 350 table.set("system_time", self._parse_system_time()) 351 return table 352 353 def _parse_returns(self) -> exp.Expression: 354 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 355 returns = super()._parse_returns() 356 returns.set("table", table) 357 return returns 358 359 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 360 to = self._parse_types() 361 self._match(TokenType.COMMA) 362 this = self._parse_conjunction() 363 364 if not to or not this: 365 return None 366 367 # Retrieve length of datatype and override to default if not specified 368 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 369 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 370 371 # Check whether a conversion with format is applicable 372 if self._match(TokenType.COMMA): 373 format_val = self._parse_number() 374 format_val_name = format_val.name if format_val else "" 375 376 if format_val_name not in TSQL.convert_format_mapping: 377 raise ValueError( 378 f"CONVERT function at T-SQL does not support format style {format_val_name}" 379 ) 380 381 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 382 383 # Check whether the convert entails a string to date format 384 if to.this == DataType.Type.DATE: 385 return self.expression(exp.StrToDate, this=this, format=format_norm) 386 # Check whether the convert entails a string to datetime format 387 elif to.this == DataType.Type.DATETIME: 388 return self.expression(exp.StrToTime, this=this, format=format_norm) 389 # Check whether the convert entails a date to string format 390 elif to.this in self.VAR_LENGTH_DATATYPES: 391 return self.expression( 392 exp.Cast if strict else exp.TryCast, 393 to=to, 394 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 395 ) 396 elif to.this == DataType.Type.TEXT: 397 return self.expression(exp.TimeToStr, this=this, format=format_norm) 398 399 # Entails a simple cast without any format requirement 400 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 401 402 def _parse_user_defined_function( 403 self, kind: t.Optional[TokenType] = None 404 ) -> t.Optional[exp.Expression]: 405 this = super()._parse_user_defined_function(kind=kind) 406 407 if ( 408 kind == TokenType.FUNCTION 409 or isinstance(this, exp.UserDefinedFunction) 410 or self._match(TokenType.ALIAS, advance=False) 411 ): 412 return this 413 414 expressions = self._parse_csv(self._parse_function_parameter) 415 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions) 416 417 class Generator(generator.Generator): 418 LOCKING_READS_SUPPORTED = True 419 420 TYPE_MAPPING = { 421 **generator.Generator.TYPE_MAPPING, # type: ignore 422 exp.DataType.Type.INT: "INTEGER", 423 exp.DataType.Type.DECIMAL: "NUMERIC", 424 exp.DataType.Type.DATETIME: "DATETIME2", 425 exp.DataType.Type.VARIANT: "SQL_VARIANT", 426 } 427 428 TRANSFORMS = { 429 **generator.Generator.TRANSFORMS, # type: ignore 430 exp.DateAdd: generate_date_delta_with_unit_sql, 431 exp.DateDiff: generate_date_delta_with_unit_sql, 432 exp.CurrentDate: rename_func("GETDATE"), 433 exp.CurrentTimestamp: rename_func("GETDATE"), 434 exp.If: rename_func("IIF"), 435 exp.NumberToStr: _format_sql, 436 exp.TimeToStr: _format_sql, 437 exp.GroupConcat: _string_agg_sql, 438 exp.Min: min_or_least, 439 } 440 441 TRANSFORMS.pop(exp.ReturnsProperty) 442 443 def systemtime_sql(self, expression: exp.SystemTime) -> str: 444 kind = expression.args["kind"] 445 if kind == "ALL": 446 return "FOR SYSTEM_TIME ALL" 447 448 start = self.sql(expression, "this") 449 if kind == "AS OF": 450 return f"FOR SYSTEM_TIME AS OF {start}" 451 452 end = self.sql(expression, "expression") 453 if kind == "FROM": 454 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 455 if kind == "BETWEEN": 456 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 457 458 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 459 460 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 461 table = expression.args.get("table") 462 table = f"{table} " if table else "" 463 return f"RETURNS {table}{self.sql(expression, 'this')}"
def
generate_date_delta_with_unit_sql(self, e):
133class TSQL(Dialect): 134 null_ordering = "nulls_are_small" 135 time_format = "'yyyy-mm-dd hh:mm:ss'" 136 137 time_mapping = { 138 "year": "%Y", 139 "qq": "%q", 140 "q": "%q", 141 "quarter": "%q", 142 "dayofyear": "%j", 143 "day": "%d", 144 "dy": "%d", 145 "y": "%Y", 146 "week": "%W", 147 "ww": "%W", 148 "wk": "%W", 149 "hour": "%h", 150 "hh": "%I", 151 "minute": "%M", 152 "mi": "%M", 153 "n": "%M", 154 "second": "%S", 155 "ss": "%S", 156 "s": "%-S", 157 "millisecond": "%f", 158 "ms": "%f", 159 "weekday": "%W", 160 "dw": "%W", 161 "month": "%m", 162 "mm": "%M", 163 "m": "%-M", 164 "Y": "%Y", 165 "YYYY": "%Y", 166 "YY": "%y", 167 "MMMM": "%B", 168 "MMM": "%b", 169 "MM": "%m", 170 "M": "%-m", 171 "dd": "%d", 172 "d": "%-d", 173 "HH": "%H", 174 "H": "%-H", 175 "h": "%-I", 176 "S": "%f", 177 "yyyy": "%Y", 178 "yy": "%y", 179 } 180 181 convert_format_mapping = { 182 "0": "%b %d %Y %-I:%M%p", 183 "1": "%m/%d/%y", 184 "2": "%y.%m.%d", 185 "3": "%d/%m/%y", 186 "4": "%d.%m.%y", 187 "5": "%d-%m-%y", 188 "6": "%d %b %y", 189 "7": "%b %d, %y", 190 "8": "%H:%M:%S", 191 "9": "%b %d %Y %-I:%M:%S:%f%p", 192 "10": "mm-dd-yy", 193 "11": "yy/mm/dd", 194 "12": "yymmdd", 195 "13": "%d %b %Y %H:%M:ss:%f", 196 "14": "%H:%M:%S:%f", 197 "20": "%Y-%m-%d %H:%M:%S", 198 "21": "%Y-%m-%d %H:%M:%S.%f", 199 "22": "%m/%d/%y %-I:%M:%S %p", 200 "23": "%Y-%m-%d", 201 "24": "%H:%M:%S", 202 "25": "%Y-%m-%d %H:%M:%S.%f", 203 "100": "%b %d %Y %-I:%M%p", 204 "101": "%m/%d/%Y", 205 "102": "%Y.%m.%d", 206 "103": "%d/%m/%Y", 207 "104": "%d.%m.%Y", 208 "105": "%d-%m-%Y", 209 "106": "%d %b %Y", 210 "107": "%b %d, %Y", 211 "108": "%H:%M:%S", 212 "109": "%b %d %Y %-I:%M:%S:%f%p", 213 "110": "%m-%d-%Y", 214 "111": "%Y/%m/%d", 215 "112": "%Y%m%d", 216 "113": "%d %b %Y %H:%M:%S:%f", 217 "114": "%H:%M:%S:%f", 218 "120": "%Y-%m-%d %H:%M:%S", 219 "121": "%Y-%m-%d %H:%M:%S.%f", 220 } 221 # not sure if complete 222 format_time_mapping = { 223 "y": "%B %Y", 224 "d": "%m/%d/%Y", 225 "H": "%-H", 226 "h": "%-I", 227 "s": "%Y-%m-%d %H:%M:%S", 228 "D": "%A,%B,%Y", 229 "f": "%A,%B,%Y %-I:%M %p", 230 "F": "%A,%B,%Y %-I:%M:%S %p", 231 "g": "%m/%d/%Y %-I:%M %p", 232 "G": "%m/%d/%Y %-I:%M:%S %p", 233 "M": "%B %-d", 234 "m": "%B %-d", 235 "O": "%Y-%m-%dT%H:%M:%S", 236 "u": "%Y-%M-%D %H:%M:%S%z", 237 "U": "%A, %B %D, %Y %H:%M:%S%z", 238 "T": "%-I:%M:%S %p", 239 "t": "%-I:%M", 240 "Y": "%a %Y", 241 } 242 243 class Tokenizer(tokens.Tokenizer): 244 IDENTIFIERS = ['"', ("[", "]")] 245 246 QUOTES = ["'", '"'] 247 248 KEYWORDS = { 249 **tokens.Tokenizer.KEYWORDS, 250 "DATETIME2": TokenType.DATETIME, 251 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 252 "DECLARE": TokenType.COMMAND, 253 "IMAGE": TokenType.IMAGE, 254 "MONEY": TokenType.MONEY, 255 "NTEXT": TokenType.TEXT, 256 "NVARCHAR(MAX)": TokenType.TEXT, 257 "PRINT": TokenType.COMMAND, 258 "PROC": TokenType.PROCEDURE, 259 "REAL": TokenType.FLOAT, 260 "ROWVERSION": TokenType.ROWVERSION, 261 "SMALLDATETIME": TokenType.DATETIME, 262 "SMALLMONEY": TokenType.SMALLMONEY, 263 "SQL_VARIANT": TokenType.VARIANT, 264 "TIME": TokenType.TIMESTAMP, 265 "TOP": TokenType.TOP, 266 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 267 "VARCHAR(MAX)": TokenType.TEXT, 268 "XML": TokenType.XML, 269 } 270 271 # TSQL allows @, # to appear as a variable/identifier prefix 272 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 273 SINGLE_TOKENS.pop("@") 274 SINGLE_TOKENS.pop("#") 275 276 class Parser(parser.Parser): 277 FUNCTIONS = { 278 **parser.Parser.FUNCTIONS, # type: ignore 279 "CHARINDEX": lambda args: exp.StrPosition( 280 this=seq_get(args, 1), 281 substr=seq_get(args, 0), 282 position=seq_get(args, 2), 283 ), 284 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 285 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 286 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 287 "DATEPART": _format_time_lambda(exp.TimeToStr), 288 "EOMONTH": _parse_eomonth, 289 "FORMAT": _parse_format, 290 "GETDATE": exp.CurrentTimestamp.from_arg_list, 291 "IIF": exp.If.from_arg_list, 292 "ISNULL": exp.Coalesce.from_arg_list, 293 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 294 "LEN": exp.Length.from_arg_list, 295 "REPLICATE": exp.Repeat.from_arg_list, 296 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 297 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 298 } 299 300 VAR_LENGTH_DATATYPES = { 301 DataType.Type.NVARCHAR, 302 DataType.Type.VARCHAR, 303 DataType.Type.CHAR, 304 DataType.Type.NCHAR, 305 } 306 307 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 308 TokenType.TABLE, 309 *parser.Parser.TYPE_TOKENS, # type: ignore 310 } 311 312 STATEMENT_PARSERS = { 313 **parser.Parser.STATEMENT_PARSERS, # type: ignore 314 TokenType.END: lambda self: self._parse_command(), 315 } 316 317 def _parse_system_time(self) -> t.Optional[exp.Expression]: 318 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 319 return None 320 321 if self._match_text_seq("AS", "OF"): 322 system_time = self.expression( 323 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 324 ) 325 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 326 kind = self._prev.text 327 this = self._parse_bitwise() 328 self._match_texts(("TO", "AND")) 329 expression = self._parse_bitwise() 330 system_time = self.expression( 331 exp.SystemTime, this=this, expression=expression, kind=kind 332 ) 333 elif self._match_text_seq("CONTAINED", "IN"): 334 args = self._parse_wrapped_csv(self._parse_bitwise) 335 system_time = self.expression( 336 exp.SystemTime, 337 this=seq_get(args, 0), 338 expression=seq_get(args, 1), 339 kind="CONTAINED IN", 340 ) 341 elif self._match(TokenType.ALL): 342 system_time = self.expression(exp.SystemTime, kind="ALL") 343 else: 344 system_time = None 345 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 346 347 return system_time 348 349 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 350 table = super()._parse_table_parts(schema=schema) 351 table.set("system_time", self._parse_system_time()) 352 return table 353 354 def _parse_returns(self) -> exp.Expression: 355 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 356 returns = super()._parse_returns() 357 returns.set("table", table) 358 return returns 359 360 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 361 to = self._parse_types() 362 self._match(TokenType.COMMA) 363 this = self._parse_conjunction() 364 365 if not to or not this: 366 return None 367 368 # Retrieve length of datatype and override to default if not specified 369 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 370 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 371 372 # Check whether a conversion with format is applicable 373 if self._match(TokenType.COMMA): 374 format_val = self._parse_number() 375 format_val_name = format_val.name if format_val else "" 376 377 if format_val_name not in TSQL.convert_format_mapping: 378 raise ValueError( 379 f"CONVERT function at T-SQL does not support format style {format_val_name}" 380 ) 381 382 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 383 384 # Check whether the convert entails a string to date format 385 if to.this == DataType.Type.DATE: 386 return self.expression(exp.StrToDate, this=this, format=format_norm) 387 # Check whether the convert entails a string to datetime format 388 elif to.this == DataType.Type.DATETIME: 389 return self.expression(exp.StrToTime, this=this, format=format_norm) 390 # Check whether the convert entails a date to string format 391 elif to.this in self.VAR_LENGTH_DATATYPES: 392 return self.expression( 393 exp.Cast if strict else exp.TryCast, 394 to=to, 395 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 396 ) 397 elif to.this == DataType.Type.TEXT: 398 return self.expression(exp.TimeToStr, this=this, format=format_norm) 399 400 # Entails a simple cast without any format requirement 401 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 402 403 def _parse_user_defined_function( 404 self, kind: t.Optional[TokenType] = None 405 ) -> t.Optional[exp.Expression]: 406 this = super()._parse_user_defined_function(kind=kind) 407 408 if ( 409 kind == TokenType.FUNCTION 410 or isinstance(this, exp.UserDefinedFunction) 411 or self._match(TokenType.ALIAS, advance=False) 412 ): 413 return this 414 415 expressions = self._parse_csv(self._parse_function_parameter) 416 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions) 417 418 class Generator(generator.Generator): 419 LOCKING_READS_SUPPORTED = True 420 421 TYPE_MAPPING = { 422 **generator.Generator.TYPE_MAPPING, # type: ignore 423 exp.DataType.Type.INT: "INTEGER", 424 exp.DataType.Type.DECIMAL: "NUMERIC", 425 exp.DataType.Type.DATETIME: "DATETIME2", 426 exp.DataType.Type.VARIANT: "SQL_VARIANT", 427 } 428 429 TRANSFORMS = { 430 **generator.Generator.TRANSFORMS, # type: ignore 431 exp.DateAdd: generate_date_delta_with_unit_sql, 432 exp.DateDiff: generate_date_delta_with_unit_sql, 433 exp.CurrentDate: rename_func("GETDATE"), 434 exp.CurrentTimestamp: rename_func("GETDATE"), 435 exp.If: rename_func("IIF"), 436 exp.NumberToStr: _format_sql, 437 exp.TimeToStr: _format_sql, 438 exp.GroupConcat: _string_agg_sql, 439 exp.Min: min_or_least, 440 } 441 442 TRANSFORMS.pop(exp.ReturnsProperty) 443 444 def systemtime_sql(self, expression: exp.SystemTime) -> str: 445 kind = expression.args["kind"] 446 if kind == "ALL": 447 return "FOR SYSTEM_TIME ALL" 448 449 start = self.sql(expression, "this") 450 if kind == "AS OF": 451 return f"FOR SYSTEM_TIME AS OF {start}" 452 453 end = self.sql(expression, "expression") 454 if kind == "FROM": 455 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 456 if kind == "BETWEEN": 457 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 458 459 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 460 461 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 462 table = expression.args.get("table") 463 table = f"{table} " if table else "" 464 return f"RETURNS {table}{self.sql(expression, 'this')}"
243 class Tokenizer(tokens.Tokenizer): 244 IDENTIFIERS = ['"', ("[", "]")] 245 246 QUOTES = ["'", '"'] 247 248 KEYWORDS = { 249 **tokens.Tokenizer.KEYWORDS, 250 "DATETIME2": TokenType.DATETIME, 251 "DATETIMEOFFSET": TokenType.TIMESTAMPTZ, 252 "DECLARE": TokenType.COMMAND, 253 "IMAGE": TokenType.IMAGE, 254 "MONEY": TokenType.MONEY, 255 "NTEXT": TokenType.TEXT, 256 "NVARCHAR(MAX)": TokenType.TEXT, 257 "PRINT": TokenType.COMMAND, 258 "PROC": TokenType.PROCEDURE, 259 "REAL": TokenType.FLOAT, 260 "ROWVERSION": TokenType.ROWVERSION, 261 "SMALLDATETIME": TokenType.DATETIME, 262 "SMALLMONEY": TokenType.SMALLMONEY, 263 "SQL_VARIANT": TokenType.VARIANT, 264 "TIME": TokenType.TIMESTAMP, 265 "TOP": TokenType.TOP, 266 "UNIQUEIDENTIFIER": TokenType.UNIQUEIDENTIFIER, 267 "VARCHAR(MAX)": TokenType.TEXT, 268 "XML": TokenType.XML, 269 } 270 271 # TSQL allows @, # to appear as a variable/identifier prefix 272 SINGLE_TOKENS = tokens.Tokenizer.SINGLE_TOKENS.copy() 273 SINGLE_TOKENS.pop("@") 274 SINGLE_TOKENS.pop("#")
Inherited Members
276 class Parser(parser.Parser): 277 FUNCTIONS = { 278 **parser.Parser.FUNCTIONS, # type: ignore 279 "CHARINDEX": lambda args: exp.StrPosition( 280 this=seq_get(args, 1), 281 substr=seq_get(args, 0), 282 position=seq_get(args, 2), 283 ), 284 "DATEADD": parse_date_delta(exp.DateAdd, unit_mapping=DATE_DELTA_INTERVAL), 285 "DATEDIFF": parse_date_delta(exp.DateDiff, unit_mapping=DATE_DELTA_INTERVAL), 286 "DATENAME": _format_time_lambda(exp.TimeToStr, full_format_mapping=True), 287 "DATEPART": _format_time_lambda(exp.TimeToStr), 288 "EOMONTH": _parse_eomonth, 289 "FORMAT": _parse_format, 290 "GETDATE": exp.CurrentTimestamp.from_arg_list, 291 "IIF": exp.If.from_arg_list, 292 "ISNULL": exp.Coalesce.from_arg_list, 293 "JSON_VALUE": exp.JSONExtractScalar.from_arg_list, 294 "LEN": exp.Length.from_arg_list, 295 "REPLICATE": exp.Repeat.from_arg_list, 296 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 297 "SYSDATETIME": exp.CurrentTimestamp.from_arg_list, 298 } 299 300 VAR_LENGTH_DATATYPES = { 301 DataType.Type.NVARCHAR, 302 DataType.Type.VARCHAR, 303 DataType.Type.CHAR, 304 DataType.Type.NCHAR, 305 } 306 307 RETURNS_TABLE_TOKENS = parser.Parser.ID_VAR_TOKENS - { # type: ignore 308 TokenType.TABLE, 309 *parser.Parser.TYPE_TOKENS, # type: ignore 310 } 311 312 STATEMENT_PARSERS = { 313 **parser.Parser.STATEMENT_PARSERS, # type: ignore 314 TokenType.END: lambda self: self._parse_command(), 315 } 316 317 def _parse_system_time(self) -> t.Optional[exp.Expression]: 318 if not self._match_text_seq("FOR", "SYSTEM_TIME"): 319 return None 320 321 if self._match_text_seq("AS", "OF"): 322 system_time = self.expression( 323 exp.SystemTime, this=self._parse_bitwise(), kind="AS OF" 324 ) 325 elif self._match_set((TokenType.FROM, TokenType.BETWEEN)): 326 kind = self._prev.text 327 this = self._parse_bitwise() 328 self._match_texts(("TO", "AND")) 329 expression = self._parse_bitwise() 330 system_time = self.expression( 331 exp.SystemTime, this=this, expression=expression, kind=kind 332 ) 333 elif self._match_text_seq("CONTAINED", "IN"): 334 args = self._parse_wrapped_csv(self._parse_bitwise) 335 system_time = self.expression( 336 exp.SystemTime, 337 this=seq_get(args, 0), 338 expression=seq_get(args, 1), 339 kind="CONTAINED IN", 340 ) 341 elif self._match(TokenType.ALL): 342 system_time = self.expression(exp.SystemTime, kind="ALL") 343 else: 344 system_time = None 345 self.raise_error("Unable to parse FOR SYSTEM_TIME clause") 346 347 return system_time 348 349 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 350 table = super()._parse_table_parts(schema=schema) 351 table.set("system_time", self._parse_system_time()) 352 return table 353 354 def _parse_returns(self) -> exp.Expression: 355 table = self._parse_id_var(any_token=False, tokens=self.RETURNS_TABLE_TOKENS) 356 returns = super()._parse_returns() 357 returns.set("table", table) 358 return returns 359 360 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 361 to = self._parse_types() 362 self._match(TokenType.COMMA) 363 this = self._parse_conjunction() 364 365 if not to or not this: 366 return None 367 368 # Retrieve length of datatype and override to default if not specified 369 if seq_get(to.expressions, 0) is None and to.this in self.VAR_LENGTH_DATATYPES: 370 to = exp.DataType.build(to.this, expressions=[exp.Literal.number(30)], nested=False) 371 372 # Check whether a conversion with format is applicable 373 if self._match(TokenType.COMMA): 374 format_val = self._parse_number() 375 format_val_name = format_val.name if format_val else "" 376 377 if format_val_name not in TSQL.convert_format_mapping: 378 raise ValueError( 379 f"CONVERT function at T-SQL does not support format style {format_val_name}" 380 ) 381 382 format_norm = exp.Literal.string(TSQL.convert_format_mapping[format_val_name]) 383 384 # Check whether the convert entails a string to date format 385 if to.this == DataType.Type.DATE: 386 return self.expression(exp.StrToDate, this=this, format=format_norm) 387 # Check whether the convert entails a string to datetime format 388 elif to.this == DataType.Type.DATETIME: 389 return self.expression(exp.StrToTime, this=this, format=format_norm) 390 # Check whether the convert entails a date to string format 391 elif to.this in self.VAR_LENGTH_DATATYPES: 392 return self.expression( 393 exp.Cast if strict else exp.TryCast, 394 to=to, 395 this=self.expression(exp.TimeToStr, this=this, format=format_norm), 396 ) 397 elif to.this == DataType.Type.TEXT: 398 return self.expression(exp.TimeToStr, this=this, format=format_norm) 399 400 # Entails a simple cast without any format requirement 401 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 402 403 def _parse_user_defined_function( 404 self, kind: t.Optional[TokenType] = None 405 ) -> t.Optional[exp.Expression]: 406 this = super()._parse_user_defined_function(kind=kind) 407 408 if ( 409 kind == TokenType.FUNCTION 410 or isinstance(this, exp.UserDefinedFunction) 411 or self._match(TokenType.ALIAS, advance=False) 412 ): 413 return this 414 415 expressions = self._parse_csv(self._parse_function_parameter) 416 return self.expression(exp.UserDefinedFunction, this=this, expressions=expressions)
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
418 class Generator(generator.Generator): 419 LOCKING_READS_SUPPORTED = True 420 421 TYPE_MAPPING = { 422 **generator.Generator.TYPE_MAPPING, # type: ignore 423 exp.DataType.Type.INT: "INTEGER", 424 exp.DataType.Type.DECIMAL: "NUMERIC", 425 exp.DataType.Type.DATETIME: "DATETIME2", 426 exp.DataType.Type.VARIANT: "SQL_VARIANT", 427 } 428 429 TRANSFORMS = { 430 **generator.Generator.TRANSFORMS, # type: ignore 431 exp.DateAdd: generate_date_delta_with_unit_sql, 432 exp.DateDiff: generate_date_delta_with_unit_sql, 433 exp.CurrentDate: rename_func("GETDATE"), 434 exp.CurrentTimestamp: rename_func("GETDATE"), 435 exp.If: rename_func("IIF"), 436 exp.NumberToStr: _format_sql, 437 exp.TimeToStr: _format_sql, 438 exp.GroupConcat: _string_agg_sql, 439 exp.Min: min_or_least, 440 } 441 442 TRANSFORMS.pop(exp.ReturnsProperty) 443 444 def systemtime_sql(self, expression: exp.SystemTime) -> str: 445 kind = expression.args["kind"] 446 if kind == "ALL": 447 return "FOR SYSTEM_TIME ALL" 448 449 start = self.sql(expression, "this") 450 if kind == "AS OF": 451 return f"FOR SYSTEM_TIME AS OF {start}" 452 453 end = self.sql(expression, "expression") 454 if kind == "FROM": 455 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 456 if kind == "BETWEEN": 457 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 458 459 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})" 460 461 def returnsproperty_sql(self, expression: exp.ReturnsProperty) -> str: 462 table = expression.args.get("table") 463 table = f"{table} " if table else "" 464 return f"RETURNS {table}{self.sql(expression, 'this')}"
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- identify (bool | str): 'always': always quote, 'safe': quote identifiers if they don't contain an upcase, True defaults to always.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
444 def systemtime_sql(self, expression: exp.SystemTime) -> str: 445 kind = expression.args["kind"] 446 if kind == "ALL": 447 return "FOR SYSTEM_TIME ALL" 448 449 start = self.sql(expression, "this") 450 if kind == "AS OF": 451 return f"FOR SYSTEM_TIME AS OF {start}" 452 453 end = self.sql(expression, "expression") 454 if kind == "FROM": 455 return f"FOR SYSTEM_TIME FROM {start} TO {end}" 456 if kind == "BETWEEN": 457 return f"FOR SYSTEM_TIME BETWEEN {start} AND {end}" 458 459 return f"FOR SYSTEM_TIME CONTAINED IN ({start}, {end})"
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- afterjournalproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- select_sql
- schema_sql
- star_sql
- structkwarg_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- window_spec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- unique_sql
- if_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- is_sql
- like_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql