Edit on GitHub

sqlglot.dialects.dialect

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from enum import Enum, auto
   6from functools import reduce
   7
   8from sqlglot import exp
   9from sqlglot.errors import ParseError
  10from sqlglot.generator import Generator
  11from sqlglot.helper import AutoName, flatten, is_int, seq_get, subclasses
  12from sqlglot.jsonpath import JSONPathTokenizer, parse as parse_json_path
  13from sqlglot.parser import Parser
  14from sqlglot.time import TIMEZONES, format_time
  15from sqlglot.tokens import Token, Tokenizer, TokenType
  16from sqlglot.trie import new_trie
  17
  18DATE_ADD_OR_DIFF = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateDiff, exp.TsOrDsDiff]
  19DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub]
  20JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar]
  21
  22
  23if t.TYPE_CHECKING:
  24    from sqlglot._typing import B, E, F
  25
  26    from sqlglot.optimizer.annotate_types import TypeAnnotator
  27
  28    AnnotatorsType = t.Dict[t.Type[E], t.Callable[[TypeAnnotator, E], E]]
  29
  30logger = logging.getLogger("sqlglot")
  31
  32UNESCAPED_SEQUENCES = {
  33    "\\a": "\a",
  34    "\\b": "\b",
  35    "\\f": "\f",
  36    "\\n": "\n",
  37    "\\r": "\r",
  38    "\\t": "\t",
  39    "\\v": "\v",
  40    "\\\\": "\\",
  41}
  42
  43
  44def _annotate_with_type_lambda(data_type: exp.DataType.Type) -> t.Callable[[TypeAnnotator, E], E]:
  45    return lambda self, e: self._annotate_with_type(e, data_type)
  46
  47
  48class Dialects(str, Enum):
  49    """Dialects supported by SQLGLot."""
  50
  51    DIALECT = ""
  52
  53    ATHENA = "athena"
  54    BIGQUERY = "bigquery"
  55    CLICKHOUSE = "clickhouse"
  56    DATABRICKS = "databricks"
  57    DORIS = "doris"
  58    DRILL = "drill"
  59    DUCKDB = "duckdb"
  60    HIVE = "hive"
  61    MATERIALIZE = "materialize"
  62    MYSQL = "mysql"
  63    ORACLE = "oracle"
  64    POSTGRES = "postgres"
  65    PRESTO = "presto"
  66    PRQL = "prql"
  67    REDSHIFT = "redshift"
  68    RISINGWAVE = "risingwave"
  69    SNOWFLAKE = "snowflake"
  70    SPARK = "spark"
  71    SPARK2 = "spark2"
  72    SQLITE = "sqlite"
  73    STARROCKS = "starrocks"
  74    TABLEAU = "tableau"
  75    TERADATA = "teradata"
  76    TRINO = "trino"
  77    TSQL = "tsql"
  78
  79
  80class NormalizationStrategy(str, AutoName):
  81    """Specifies the strategy according to which identifiers should be normalized."""
  82
  83    LOWERCASE = auto()
  84    """Unquoted identifiers are lowercased."""
  85
  86    UPPERCASE = auto()
  87    """Unquoted identifiers are uppercased."""
  88
  89    CASE_SENSITIVE = auto()
  90    """Always case-sensitive, regardless of quotes."""
  91
  92    CASE_INSENSITIVE = auto()
  93    """Always case-insensitive, regardless of quotes."""
  94
  95
  96class _Dialect(type):
  97    classes: t.Dict[str, t.Type[Dialect]] = {}
  98
  99    def __eq__(cls, other: t.Any) -> bool:
 100        if cls is other:
 101            return True
 102        if isinstance(other, str):
 103            return cls is cls.get(other)
 104        if isinstance(other, Dialect):
 105            return cls is type(other)
 106
 107        return False
 108
 109    def __hash__(cls) -> int:
 110        return hash(cls.__name__.lower())
 111
 112    @classmethod
 113    def __getitem__(cls, key: str) -> t.Type[Dialect]:
 114        return cls.classes[key]
 115
 116    @classmethod
 117    def get(
 118        cls, key: str, default: t.Optional[t.Type[Dialect]] = None
 119    ) -> t.Optional[t.Type[Dialect]]:
 120        return cls.classes.get(key, default)
 121
 122    def __new__(cls, clsname, bases, attrs):
 123        klass = super().__new__(cls, clsname, bases, attrs)
 124        enum = Dialects.__members__.get(clsname.upper())
 125        cls.classes[enum.value if enum is not None else clsname.lower()] = klass
 126
 127        klass.TIME_TRIE = new_trie(klass.TIME_MAPPING)
 128        klass.FORMAT_TRIE = (
 129            new_trie(klass.FORMAT_MAPPING) if klass.FORMAT_MAPPING else klass.TIME_TRIE
 130        )
 131        klass.INVERSE_TIME_MAPPING = {v: k for k, v in klass.TIME_MAPPING.items()}
 132        klass.INVERSE_TIME_TRIE = new_trie(klass.INVERSE_TIME_MAPPING)
 133        klass.INVERSE_FORMAT_MAPPING = {v: k for k, v in klass.FORMAT_MAPPING.items()}
 134        klass.INVERSE_FORMAT_TRIE = new_trie(klass.INVERSE_FORMAT_MAPPING)
 135
 136        klass.INVERSE_CREATABLE_KIND_MAPPING = {
 137            v: k for k, v in klass.CREATABLE_KIND_MAPPING.items()
 138        }
 139
 140        base = seq_get(bases, 0)
 141        base_tokenizer = (getattr(base, "tokenizer_class", Tokenizer),)
 142        base_jsonpath_tokenizer = (getattr(base, "jsonpath_tokenizer_class", JSONPathTokenizer),)
 143        base_parser = (getattr(base, "parser_class", Parser),)
 144        base_generator = (getattr(base, "generator_class", Generator),)
 145
 146        klass.tokenizer_class = klass.__dict__.get(
 147            "Tokenizer", type("Tokenizer", base_tokenizer, {})
 148        )
 149        klass.jsonpath_tokenizer_class = klass.__dict__.get(
 150            "JSONPathTokenizer", type("JSONPathTokenizer", base_jsonpath_tokenizer, {})
 151        )
 152        klass.parser_class = klass.__dict__.get("Parser", type("Parser", base_parser, {}))
 153        klass.generator_class = klass.__dict__.get(
 154            "Generator", type("Generator", base_generator, {})
 155        )
 156
 157        klass.QUOTE_START, klass.QUOTE_END = list(klass.tokenizer_class._QUOTES.items())[0]
 158        klass.IDENTIFIER_START, klass.IDENTIFIER_END = list(
 159            klass.tokenizer_class._IDENTIFIERS.items()
 160        )[0]
 161
 162        def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[str]]:
 163            return next(
 164                (
 165                    (s, e)
 166                    for s, (e, t) in klass.tokenizer_class._FORMAT_STRINGS.items()
 167                    if t == token_type
 168                ),
 169                (None, None),
 170            )
 171
 172        klass.BIT_START, klass.BIT_END = get_start_end(TokenType.BIT_STRING)
 173        klass.HEX_START, klass.HEX_END = get_start_end(TokenType.HEX_STRING)
 174        klass.BYTE_START, klass.BYTE_END = get_start_end(TokenType.BYTE_STRING)
 175        klass.UNICODE_START, klass.UNICODE_END = get_start_end(TokenType.UNICODE_STRING)
 176
 177        if "\\" in klass.tokenizer_class.STRING_ESCAPES:
 178            klass.UNESCAPED_SEQUENCES = {
 179                **UNESCAPED_SEQUENCES,
 180                **klass.UNESCAPED_SEQUENCES,
 181            }
 182
 183        klass.ESCAPED_SEQUENCES = {v: k for k, v in klass.UNESCAPED_SEQUENCES.items()}
 184
 185        klass.SUPPORTS_COLUMN_JOIN_MARKS = "(+)" in klass.tokenizer_class.KEYWORDS
 186
 187        if enum not in ("", "bigquery"):
 188            klass.generator_class.SELECT_KINDS = ()
 189
 190        if enum not in ("", "clickhouse"):
 191            klass.generator_class.SUPPORTS_NULLABLE_TYPES = False
 192
 193        if enum not in ("", "athena", "presto", "trino"):
 194            klass.generator_class.TRY_SUPPORTED = False
 195            klass.generator_class.SUPPORTS_UESCAPE = False
 196
 197        if enum not in ("", "databricks", "hive", "spark", "spark2"):
 198            modifier_transforms = klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS.copy()
 199            for modifier in ("cluster", "distribute", "sort"):
 200                modifier_transforms.pop(modifier, None)
 201
 202            klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS = modifier_transforms
 203
 204        if enum not in ("", "doris", "mysql"):
 205            klass.parser_class.ID_VAR_TOKENS = klass.parser_class.ID_VAR_TOKENS | {
 206                TokenType.STRAIGHT_JOIN,
 207            }
 208            klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | {
 209                TokenType.STRAIGHT_JOIN,
 210            }
 211
 212        if not klass.SUPPORTS_SEMI_ANTI_JOIN:
 213            klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | {
 214                TokenType.ANTI,
 215                TokenType.SEMI,
 216            }
 217
 218        return klass
 219
 220
 221class Dialect(metaclass=_Dialect):
 222    INDEX_OFFSET = 0
 223    """The base index offset for arrays."""
 224
 225    WEEK_OFFSET = 0
 226    """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday."""
 227
 228    UNNEST_COLUMN_ONLY = False
 229    """Whether `UNNEST` table aliases are treated as column aliases."""
 230
 231    ALIAS_POST_TABLESAMPLE = False
 232    """Whether the table alias comes after tablesample."""
 233
 234    TABLESAMPLE_SIZE_IS_PERCENT = False
 235    """Whether a size in the table sample clause represents percentage."""
 236
 237    NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE
 238    """Specifies the strategy according to which identifiers should be normalized."""
 239
 240    IDENTIFIERS_CAN_START_WITH_DIGIT = False
 241    """Whether an unquoted identifier can start with a digit."""
 242
 243    DPIPE_IS_STRING_CONCAT = True
 244    """Whether the DPIPE token (`||`) is a string concatenation operator."""
 245
 246    STRICT_STRING_CONCAT = False
 247    """Whether `CONCAT`'s arguments must be strings."""
 248
 249    SUPPORTS_USER_DEFINED_TYPES = True
 250    """Whether user-defined data types are supported."""
 251
 252    SUPPORTS_SEMI_ANTI_JOIN = True
 253    """Whether `SEMI` or `ANTI` joins are supported."""
 254
 255    SUPPORTS_COLUMN_JOIN_MARKS = False
 256    """Whether the old-style outer join (+) syntax is supported."""
 257
 258    COPY_PARAMS_ARE_CSV = True
 259    """Separator of COPY statement parameters."""
 260
 261    NORMALIZE_FUNCTIONS: bool | str = "upper"
 262    """
 263    Determines how function names are going to be normalized.
 264    Possible values:
 265        "upper" or True: Convert names to uppercase.
 266        "lower": Convert names to lowercase.
 267        False: Disables function name normalization.
 268    """
 269
 270    LOG_BASE_FIRST: t.Optional[bool] = True
 271    """
 272    Whether the base comes first in the `LOG` function.
 273    Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`)
 274    """
 275
 276    NULL_ORDERING = "nulls_are_small"
 277    """
 278    Default `NULL` ordering method to use if not explicitly set.
 279    Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"`
 280    """
 281
 282    TYPED_DIVISION = False
 283    """
 284    Whether the behavior of `a / b` depends on the types of `a` and `b`.
 285    False means `a / b` is always float division.
 286    True means `a / b` is integer division if both `a` and `b` are integers.
 287    """
 288
 289    SAFE_DIVISION = False
 290    """Whether division by zero throws an error (`False`) or returns NULL (`True`)."""
 291
 292    CONCAT_COALESCE = False
 293    """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string."""
 294
 295    HEX_LOWERCASE = False
 296    """Whether the `HEX` function returns a lowercase hexadecimal string."""
 297
 298    DATE_FORMAT = "'%Y-%m-%d'"
 299    DATEINT_FORMAT = "'%Y%m%d'"
 300    TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
 301
 302    TIME_MAPPING: t.Dict[str, str] = {}
 303    """Associates this dialect's time formats with their equivalent Python `strftime` formats."""
 304
 305    # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time
 306    # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE
 307    FORMAT_MAPPING: t.Dict[str, str] = {}
 308    """
 309    Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`.
 310    If empty, the corresponding trie will be constructed off of `TIME_MAPPING`.
 311    """
 312
 313    UNESCAPED_SEQUENCES: t.Dict[str, str] = {}
 314    """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`)."""
 315
 316    PSEUDOCOLUMNS: t.Set[str] = set()
 317    """
 318    Columns that are auto-generated by the engine corresponding to this dialect.
 319    For example, such columns may be excluded from `SELECT *` queries.
 320    """
 321
 322    PREFER_CTE_ALIAS_COLUMN = False
 323    """
 324    Some dialects, such as Snowflake, allow you to reference a CTE column alias in the
 325    HAVING clause of the CTE. This flag will cause the CTE alias columns to override
 326    any projection aliases in the subquery.
 327
 328    For example,
 329        WITH y(c) AS (
 330            SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0
 331        ) SELECT c FROM y;
 332
 333        will be rewritten as
 334
 335        WITH y(c) AS (
 336            SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
 337        ) SELECT c FROM y;
 338    """
 339
 340    COPY_PARAMS_ARE_CSV = True
 341    """
 342    Whether COPY statement parameters are separated by comma or whitespace
 343    """
 344
 345    FORCE_EARLY_ALIAS_REF_EXPANSION = False
 346    """
 347    Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
 348
 349    For example:
 350        WITH data AS (
 351        SELECT
 352            1 AS id,
 353            2 AS my_id
 354        )
 355        SELECT
 356            id AS my_id
 357        FROM
 358            data
 359        WHERE
 360            my_id = 1
 361        GROUP BY
 362            my_id,
 363        HAVING
 364            my_id = 1
 365
 366    In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except:
 367        - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1"
 368        - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
 369    """
 370
 371    EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False
 372    """Whether alias reference expansion before qualification should only happen for the GROUP BY clause."""
 373
 374    SUPPORTS_ORDER_BY_ALL = False
 375    """
 376    Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
 377    """
 378
 379    HAS_DISTINCT_ARRAY_CONSTRUCTORS = False
 380    """
 381    Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3)
 382    as the former is of type INT[] vs the latter which is SUPER
 383    """
 384
 385    SUPPORTS_FIXED_SIZE_ARRAYS = False
 386    """
 387    Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In
 388    dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator
 389    """
 390
 391    CREATABLE_KIND_MAPPING: dict[str, str] = {}
 392    """
 393    Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse
 394    equivalent of CREATE SCHEMA is CREATE DATABASE.
 395    """
 396
 397    # --- Autofilled ---
 398
 399    tokenizer_class = Tokenizer
 400    jsonpath_tokenizer_class = JSONPathTokenizer
 401    parser_class = Parser
 402    generator_class = Generator
 403
 404    # A trie of the time_mapping keys
 405    TIME_TRIE: t.Dict = {}
 406    FORMAT_TRIE: t.Dict = {}
 407
 408    INVERSE_TIME_MAPPING: t.Dict[str, str] = {}
 409    INVERSE_TIME_TRIE: t.Dict = {}
 410    INVERSE_FORMAT_MAPPING: t.Dict[str, str] = {}
 411    INVERSE_FORMAT_TRIE: t.Dict = {}
 412
 413    INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
 414
 415    ESCAPED_SEQUENCES: t.Dict[str, str] = {}
 416
 417    # Delimiters for string literals and identifiers
 418    QUOTE_START = "'"
 419    QUOTE_END = "'"
 420    IDENTIFIER_START = '"'
 421    IDENTIFIER_END = '"'
 422
 423    # Delimiters for bit, hex, byte and unicode literals
 424    BIT_START: t.Optional[str] = None
 425    BIT_END: t.Optional[str] = None
 426    HEX_START: t.Optional[str] = None
 427    HEX_END: t.Optional[str] = None
 428    BYTE_START: t.Optional[str] = None
 429    BYTE_END: t.Optional[str] = None
 430    UNICODE_START: t.Optional[str] = None
 431    UNICODE_END: t.Optional[str] = None
 432
 433    DATE_PART_MAPPING = {
 434        "Y": "YEAR",
 435        "YY": "YEAR",
 436        "YYY": "YEAR",
 437        "YYYY": "YEAR",
 438        "YR": "YEAR",
 439        "YEARS": "YEAR",
 440        "YRS": "YEAR",
 441        "MM": "MONTH",
 442        "MON": "MONTH",
 443        "MONS": "MONTH",
 444        "MONTHS": "MONTH",
 445        "D": "DAY",
 446        "DD": "DAY",
 447        "DAYS": "DAY",
 448        "DAYOFMONTH": "DAY",
 449        "DAY OF WEEK": "DAYOFWEEK",
 450        "WEEKDAY": "DAYOFWEEK",
 451        "DOW": "DAYOFWEEK",
 452        "DW": "DAYOFWEEK",
 453        "WEEKDAY_ISO": "DAYOFWEEKISO",
 454        "DOW_ISO": "DAYOFWEEKISO",
 455        "DW_ISO": "DAYOFWEEKISO",
 456        "DAY OF YEAR": "DAYOFYEAR",
 457        "DOY": "DAYOFYEAR",
 458        "DY": "DAYOFYEAR",
 459        "W": "WEEK",
 460        "WK": "WEEK",
 461        "WEEKOFYEAR": "WEEK",
 462        "WOY": "WEEK",
 463        "WY": "WEEK",
 464        "WEEK_ISO": "WEEKISO",
 465        "WEEKOFYEARISO": "WEEKISO",
 466        "WEEKOFYEAR_ISO": "WEEKISO",
 467        "Q": "QUARTER",
 468        "QTR": "QUARTER",
 469        "QTRS": "QUARTER",
 470        "QUARTERS": "QUARTER",
 471        "H": "HOUR",
 472        "HH": "HOUR",
 473        "HR": "HOUR",
 474        "HOURS": "HOUR",
 475        "HRS": "HOUR",
 476        "M": "MINUTE",
 477        "MI": "MINUTE",
 478        "MIN": "MINUTE",
 479        "MINUTES": "MINUTE",
 480        "MINS": "MINUTE",
 481        "S": "SECOND",
 482        "SEC": "SECOND",
 483        "SECONDS": "SECOND",
 484        "SECS": "SECOND",
 485        "MS": "MILLISECOND",
 486        "MSEC": "MILLISECOND",
 487        "MSECS": "MILLISECOND",
 488        "MSECOND": "MILLISECOND",
 489        "MSECONDS": "MILLISECOND",
 490        "MILLISEC": "MILLISECOND",
 491        "MILLISECS": "MILLISECOND",
 492        "MILLISECON": "MILLISECOND",
 493        "MILLISECONDS": "MILLISECOND",
 494        "US": "MICROSECOND",
 495        "USEC": "MICROSECOND",
 496        "USECS": "MICROSECOND",
 497        "MICROSEC": "MICROSECOND",
 498        "MICROSECS": "MICROSECOND",
 499        "USECOND": "MICROSECOND",
 500        "USECONDS": "MICROSECOND",
 501        "MICROSECONDS": "MICROSECOND",
 502        "NS": "NANOSECOND",
 503        "NSEC": "NANOSECOND",
 504        "NANOSEC": "NANOSECOND",
 505        "NSECOND": "NANOSECOND",
 506        "NSECONDS": "NANOSECOND",
 507        "NANOSECS": "NANOSECOND",
 508        "EPOCH_SECOND": "EPOCH",
 509        "EPOCH_SECONDS": "EPOCH",
 510        "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND",
 511        "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND",
 512        "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND",
 513        "TZH": "TIMEZONE_HOUR",
 514        "TZM": "TIMEZONE_MINUTE",
 515        "DEC": "DECADE",
 516        "DECS": "DECADE",
 517        "DECADES": "DECADE",
 518        "MIL": "MILLENIUM",
 519        "MILS": "MILLENIUM",
 520        "MILLENIA": "MILLENIUM",
 521        "C": "CENTURY",
 522        "CENT": "CENTURY",
 523        "CENTS": "CENTURY",
 524        "CENTURIES": "CENTURY",
 525    }
 526
 527    TYPE_TO_EXPRESSIONS: t.Dict[exp.DataType.Type, t.Set[t.Type[exp.Expression]]] = {
 528        exp.DataType.Type.BIGINT: {
 529            exp.ApproxDistinct,
 530            exp.ArraySize,
 531            exp.Count,
 532            exp.Length,
 533        },
 534        exp.DataType.Type.BOOLEAN: {
 535            exp.Between,
 536            exp.Boolean,
 537            exp.In,
 538            exp.RegexpLike,
 539        },
 540        exp.DataType.Type.DATE: {
 541            exp.CurrentDate,
 542            exp.Date,
 543            exp.DateFromParts,
 544            exp.DateStrToDate,
 545            exp.DiToDate,
 546            exp.StrToDate,
 547            exp.TimeStrToDate,
 548            exp.TsOrDsToDate,
 549        },
 550        exp.DataType.Type.DATETIME: {
 551            exp.CurrentDatetime,
 552            exp.Datetime,
 553            exp.DatetimeAdd,
 554            exp.DatetimeSub,
 555        },
 556        exp.DataType.Type.DOUBLE: {
 557            exp.ApproxQuantile,
 558            exp.Avg,
 559            exp.Div,
 560            exp.Exp,
 561            exp.Ln,
 562            exp.Log,
 563            exp.Pow,
 564            exp.Quantile,
 565            exp.Round,
 566            exp.SafeDivide,
 567            exp.Sqrt,
 568            exp.Stddev,
 569            exp.StddevPop,
 570            exp.StddevSamp,
 571            exp.Variance,
 572            exp.VariancePop,
 573        },
 574        exp.DataType.Type.INT: {
 575            exp.Ceil,
 576            exp.DatetimeDiff,
 577            exp.DateDiff,
 578            exp.TimestampDiff,
 579            exp.TimeDiff,
 580            exp.DateToDi,
 581            exp.Levenshtein,
 582            exp.Sign,
 583            exp.StrPosition,
 584            exp.TsOrDiToDi,
 585        },
 586        exp.DataType.Type.JSON: {
 587            exp.ParseJSON,
 588        },
 589        exp.DataType.Type.TIME: {
 590            exp.Time,
 591        },
 592        exp.DataType.Type.TIMESTAMP: {
 593            exp.CurrentTime,
 594            exp.CurrentTimestamp,
 595            exp.StrToTime,
 596            exp.TimeAdd,
 597            exp.TimeStrToTime,
 598            exp.TimeSub,
 599            exp.TimestampAdd,
 600            exp.TimestampSub,
 601            exp.UnixToTime,
 602        },
 603        exp.DataType.Type.TINYINT: {
 604            exp.Day,
 605            exp.Month,
 606            exp.Week,
 607            exp.Year,
 608            exp.Quarter,
 609        },
 610        exp.DataType.Type.VARCHAR: {
 611            exp.ArrayConcat,
 612            exp.Concat,
 613            exp.ConcatWs,
 614            exp.DateToDateStr,
 615            exp.GroupConcat,
 616            exp.Initcap,
 617            exp.Lower,
 618            exp.Substring,
 619            exp.TimeToStr,
 620            exp.TimeToTimeStr,
 621            exp.Trim,
 622            exp.TsOrDsToDateStr,
 623            exp.UnixToStr,
 624            exp.UnixToTimeStr,
 625            exp.Upper,
 626        },
 627    }
 628
 629    ANNOTATORS: AnnotatorsType = {
 630        **{
 631            expr_type: lambda self, e: self._annotate_unary(e)
 632            for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias))
 633        },
 634        **{
 635            expr_type: lambda self, e: self._annotate_binary(e)
 636            for expr_type in subclasses(exp.__name__, exp.Binary)
 637        },
 638        **{
 639            expr_type: _annotate_with_type_lambda(data_type)
 640            for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
 641            for expr_type in expressions
 642        },
 643        exp.Abs: lambda self, e: self._annotate_by_args(e, "this"),
 644        exp.Anonymous: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
 645        exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True),
 646        exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True),
 647        exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 648        exp.Bracket: lambda self, e: self._annotate_bracket(e),
 649        exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
 650        exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"),
 651        exp.Coalesce: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 652        exp.DataType: lambda self, e: self._annotate_with_type(e, e.copy()),
 653        exp.DateAdd: lambda self, e: self._annotate_timeunit(e),
 654        exp.DateSub: lambda self, e: self._annotate_timeunit(e),
 655        exp.DateTrunc: lambda self, e: self._annotate_timeunit(e),
 656        exp.Distinct: lambda self, e: self._annotate_by_args(e, "expressions"),
 657        exp.Div: lambda self, e: self._annotate_div(e),
 658        exp.Dot: lambda self, e: self._annotate_dot(e),
 659        exp.Explode: lambda self, e: self._annotate_explode(e),
 660        exp.Extract: lambda self, e: self._annotate_extract(e),
 661        exp.Filter: lambda self, e: self._annotate_by_args(e, "this"),
 662        exp.GenerateDateArray: lambda self, e: self._annotate_with_type(
 663            e, exp.DataType.build("ARRAY<DATE>")
 664        ),
 665        exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
 666            e, exp.DataType.build("ARRAY<TIMESTAMP>")
 667        ),
 668        exp.If: lambda self, e: self._annotate_by_args(e, "true", "false"),
 669        exp.Interval: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.INTERVAL),
 670        exp.Least: lambda self, e: self._annotate_by_args(e, "expressions"),
 671        exp.Literal: lambda self, e: self._annotate_literal(e),
 672        exp.Map: lambda self, e: self._annotate_map(e),
 673        exp.Max: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 674        exp.Min: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
 675        exp.Null: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.NULL),
 676        exp.Nullif: lambda self, e: self._annotate_by_args(e, "this", "expression"),
 677        exp.PropertyEQ: lambda self, e: self._annotate_by_args(e, "expression"),
 678        exp.Slice: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
 679        exp.Struct: lambda self, e: self._annotate_struct(e),
 680        exp.Sum: lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True),
 681        exp.Timestamp: lambda self, e: self._annotate_with_type(
 682            e,
 683            exp.DataType.Type.TIMESTAMPTZ if e.args.get("with_tz") else exp.DataType.Type.TIMESTAMP,
 684        ),
 685        exp.ToMap: lambda self, e: self._annotate_to_map(e),
 686        exp.TryCast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
 687        exp.Unnest: lambda self, e: self._annotate_unnest(e),
 688        exp.VarMap: lambda self, e: self._annotate_map(e),
 689    }
 690
 691    @classmethod
 692    def get_or_raise(cls, dialect: DialectType) -> Dialect:
 693        """
 694        Look up a dialect in the global dialect registry and return it if it exists.
 695
 696        Args:
 697            dialect: The target dialect. If this is a string, it can be optionally followed by
 698                additional key-value pairs that are separated by commas and are used to specify
 699                dialect settings, such as whether the dialect's identifiers are case-sensitive.
 700
 701        Example:
 702            >>> dialect = dialect_class = get_or_raise("duckdb")
 703            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
 704
 705        Returns:
 706            The corresponding Dialect instance.
 707        """
 708
 709        if not dialect:
 710            return cls()
 711        if isinstance(dialect, _Dialect):
 712            return dialect()
 713        if isinstance(dialect, Dialect):
 714            return dialect
 715        if isinstance(dialect, str):
 716            try:
 717                dialect_name, *kv_strings = dialect.split(",")
 718                kv_pairs = (kv.split("=") for kv in kv_strings)
 719                kwargs = {}
 720                for pair in kv_pairs:
 721                    key = pair[0].strip()
 722                    value: t.Union[bool | str | None] = None
 723
 724                    if len(pair) == 1:
 725                        # Default initialize standalone settings to True
 726                        value = True
 727                    elif len(pair) == 2:
 728                        value = pair[1].strip()
 729
 730                        # Coerce the value to boolean if it matches to the truthy/falsy values below
 731                        value_lower = value.lower()
 732                        if value_lower in ("true", "1"):
 733                            value = True
 734                        elif value_lower in ("false", "0"):
 735                            value = False
 736
 737                    kwargs[key] = value
 738
 739            except ValueError:
 740                raise ValueError(
 741                    f"Invalid dialect format: '{dialect}'. "
 742                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
 743                )
 744
 745            result = cls.get(dialect_name.strip())
 746            if not result:
 747                from difflib import get_close_matches
 748
 749                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
 750                if similar:
 751                    similar = f" Did you mean {similar}?"
 752
 753                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
 754
 755            return result(**kwargs)
 756
 757        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
 758
 759    @classmethod
 760    def format_time(
 761        cls, expression: t.Optional[str | exp.Expression]
 762    ) -> t.Optional[exp.Expression]:
 763        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
 764        if isinstance(expression, str):
 765            return exp.Literal.string(
 766                # the time formats are quoted
 767                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
 768            )
 769
 770        if expression and expression.is_string:
 771            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
 772
 773        return expression
 774
 775    def __init__(self, **kwargs) -> None:
 776        normalization_strategy = kwargs.pop("normalization_strategy", None)
 777
 778        if normalization_strategy is None:
 779            self.normalization_strategy = self.NORMALIZATION_STRATEGY
 780        else:
 781            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
 782
 783        self.settings = kwargs
 784
 785    def __eq__(self, other: t.Any) -> bool:
 786        # Does not currently take dialect state into account
 787        return type(self) == other
 788
 789    def __hash__(self) -> int:
 790        # Does not currently take dialect state into account
 791        return hash(type(self))
 792
 793    def normalize_identifier(self, expression: E) -> E:
 794        """
 795        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
 796
 797        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
 798        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
 799        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
 800        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
 801
 802        There are also dialects like Spark, which are case-insensitive even when quotes are
 803        present, and dialects like MySQL, whose resolution rules match those employed by the
 804        underlying operating system, for example they may always be case-sensitive in Linux.
 805
 806        Finally, the normalization behavior of some engines can even be controlled through flags,
 807        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
 808
 809        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
 810        that it can analyze queries in the optimizer and successfully capture their semantics.
 811        """
 812        if (
 813            isinstance(expression, exp.Identifier)
 814            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
 815            and (
 816                not expression.quoted
 817                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
 818            )
 819        ):
 820            expression.set(
 821                "this",
 822                (
 823                    expression.this.upper()
 824                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
 825                    else expression.this.lower()
 826                ),
 827            )
 828
 829        return expression
 830
 831    def case_sensitive(self, text: str) -> bool:
 832        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
 833        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
 834            return False
 835
 836        unsafe = (
 837            str.islower
 838            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
 839            else str.isupper
 840        )
 841        return any(unsafe(char) for char in text)
 842
 843    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
 844        """Checks if text can be identified given an identify option.
 845
 846        Args:
 847            text: The text to check.
 848            identify:
 849                `"always"` or `True`: Always returns `True`.
 850                `"safe"`: Only returns `True` if the identifier is case-insensitive.
 851
 852        Returns:
 853            Whether the given text can be identified.
 854        """
 855        if identify is True or identify == "always":
 856            return True
 857
 858        if identify == "safe":
 859            return not self.case_sensitive(text)
 860
 861        return False
 862
 863    def quote_identifier(self, expression: E, identify: bool = True) -> E:
 864        """
 865        Adds quotes to a given identifier.
 866
 867        Args:
 868            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
 869            identify: If set to `False`, the quotes will only be added if the identifier is deemed
 870                "unsafe", with respect to its characters and this dialect's normalization strategy.
 871        """
 872        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
 873            name = expression.this
 874            expression.set(
 875                "quoted",
 876                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
 877            )
 878
 879        return expression
 880
 881    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
 882        if isinstance(path, exp.Literal):
 883            path_text = path.name
 884            if path.is_number:
 885                path_text = f"[{path_text}]"
 886            try:
 887                return parse_json_path(path_text, self)
 888            except ParseError as e:
 889                logger.warning(f"Invalid JSON path syntax. {str(e)}")
 890
 891        return path
 892
 893    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
 894        return self.parser(**opts).parse(self.tokenize(sql), sql)
 895
 896    def parse_into(
 897        self, expression_type: exp.IntoType, sql: str, **opts
 898    ) -> t.List[t.Optional[exp.Expression]]:
 899        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
 900
 901    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
 902        return self.generator(**opts).generate(expression, copy=copy)
 903
 904    def transpile(self, sql: str, **opts) -> t.List[str]:
 905        return [
 906            self.generate(expression, copy=False, **opts) if expression else ""
 907            for expression in self.parse(sql)
 908        ]
 909
 910    def tokenize(self, sql: str) -> t.List[Token]:
 911        return self.tokenizer.tokenize(sql)
 912
 913    @property
 914    def tokenizer(self) -> Tokenizer:
 915        return self.tokenizer_class(dialect=self)
 916
 917    @property
 918    def jsonpath_tokenizer(self) -> JSONPathTokenizer:
 919        return self.jsonpath_tokenizer_class(dialect=self)
 920
 921    def parser(self, **opts) -> Parser:
 922        return self.parser_class(dialect=self, **opts)
 923
 924    def generator(self, **opts) -> Generator:
 925        return self.generator_class(dialect=self, **opts)
 926
 927
 928DialectType = t.Union[str, Dialect, t.Type[Dialect], None]
 929
 930
 931def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
 932    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
 933
 934
 935def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
 936    if expression.args.get("accuracy"):
 937        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
 938    return self.func("APPROX_COUNT_DISTINCT", expression.this)
 939
 940
 941def if_sql(
 942    name: str = "IF", false_value: t.Optional[exp.Expression | str] = None
 943) -> t.Callable[[Generator, exp.If], str]:
 944    def _if_sql(self: Generator, expression: exp.If) -> str:
 945        return self.func(
 946            name,
 947            expression.this,
 948            expression.args.get("true"),
 949            expression.args.get("false") or false_value,
 950        )
 951
 952    return _if_sql
 953
 954
 955def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
 956    this = expression.this
 957    if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string:
 958        this.replace(exp.cast(this, exp.DataType.Type.JSON))
 959
 960    return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>")
 961
 962
 963def inline_array_sql(self: Generator, expression: exp.Array) -> str:
 964    return f"[{self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)}]"
 965
 966
 967def inline_array_unless_query(self: Generator, expression: exp.Array) -> str:
 968    elem = seq_get(expression.expressions, 0)
 969    if isinstance(elem, exp.Expression) and elem.find(exp.Query):
 970        return self.func("ARRAY", elem)
 971    return inline_array_sql(self, expression)
 972
 973
 974def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
 975    return self.like_sql(
 976        exp.Like(
 977            this=exp.Lower(this=expression.this), expression=exp.Lower(this=expression.expression)
 978        )
 979    )
 980
 981
 982def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
 983    zone = self.sql(expression, "this")
 984    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
 985
 986
 987def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
 988    if expression.args.get("recursive"):
 989        self.unsupported("Recursive CTEs are unsupported")
 990        expression.args["recursive"] = False
 991    return self.with_sql(expression)
 992
 993
 994def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
 995    n = self.sql(expression, "this")
 996    d = self.sql(expression, "expression")
 997    return f"IF(({d}) <> 0, ({n}) / ({d}), NULL)"
 998
 999
1000def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
1001    self.unsupported("TABLESAMPLE unsupported")
1002    return self.sql(expression.this)
1003
1004
1005def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
1006    self.unsupported("PIVOT unsupported")
1007    return ""
1008
1009
1010def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
1011    return self.cast_sql(expression)
1012
1013
1014def no_comment_column_constraint_sql(
1015    self: Generator, expression: exp.CommentColumnConstraint
1016) -> str:
1017    self.unsupported("CommentColumnConstraint unsupported")
1018    return ""
1019
1020
1021def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str:
1022    self.unsupported("MAP_FROM_ENTRIES unsupported")
1023    return ""
1024
1025
1026def str_position_sql(
1027    self: Generator, expression: exp.StrPosition, generate_instance: bool = False
1028) -> str:
1029    this = self.sql(expression, "this")
1030    substr = self.sql(expression, "substr")
1031    position = self.sql(expression, "position")
1032    instance = expression.args.get("instance") if generate_instance else None
1033    position_offset = ""
1034
1035    if position:
1036        # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects
1037        this = self.func("SUBSTR", this, position)
1038        position_offset = f" + {position} - 1"
1039
1040    return self.func("STRPOS", this, substr, instance) + position_offset
1041
1042
1043def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
1044    return (
1045        f"{self.sql(expression, 'this')}.{self.sql(exp.to_identifier(expression.expression.name))}"
1046    )
1047
1048
1049def var_map_sql(
1050    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
1051) -> str:
1052    keys = expression.args["keys"]
1053    values = expression.args["values"]
1054
1055    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
1056        self.unsupported("Cannot convert array columns into map.")
1057        return self.func(map_func_name, keys, values)
1058
1059    args = []
1060    for key, value in zip(keys.expressions, values.expressions):
1061        args.append(self.sql(key))
1062        args.append(self.sql(value))
1063
1064    return self.func(map_func_name, *args)
1065
1066
1067def build_formatted_time(
1068    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
1069) -> t.Callable[[t.List], E]:
1070    """Helper used for time expressions.
1071
1072    Args:
1073        exp_class: the expression class to instantiate.
1074        dialect: target sql dialect.
1075        default: the default format, True being time.
1076
1077    Returns:
1078        A callable that can be used to return the appropriately formatted time expression.
1079    """
1080
1081    def _builder(args: t.List):
1082        return exp_class(
1083            this=seq_get(args, 0),
1084            format=Dialect[dialect].format_time(
1085                seq_get(args, 1)
1086                or (Dialect[dialect].TIME_FORMAT if default is True else default or None)
1087            ),
1088        )
1089
1090    return _builder
1091
1092
1093def time_format(
1094    dialect: DialectType = None,
1095) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]:
1096    def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]:
1097        """
1098        Returns the time format for a given expression, unless it's equivalent
1099        to the default time format of the dialect of interest.
1100        """
1101        time_format = self.format_time(expression)
1102        return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None
1103
1104    return _time_format
1105
1106
1107def build_date_delta(
1108    exp_class: t.Type[E],
1109    unit_mapping: t.Optional[t.Dict[str, str]] = None,
1110    default_unit: t.Optional[str] = "DAY",
1111) -> t.Callable[[t.List], E]:
1112    def _builder(args: t.List) -> E:
1113        unit_based = len(args) == 3
1114        this = args[2] if unit_based else seq_get(args, 0)
1115        unit = None
1116        if unit_based or default_unit:
1117            unit = args[0] if unit_based else exp.Literal.string(default_unit)
1118            unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit
1119        return exp_class(this=this, expression=seq_get(args, 1), unit=unit)
1120
1121    return _builder
1122
1123
1124def build_date_delta_with_interval(
1125    expression_class: t.Type[E],
1126) -> t.Callable[[t.List], t.Optional[E]]:
1127    def _builder(args: t.List) -> t.Optional[E]:
1128        if len(args) < 2:
1129            return None
1130
1131        interval = args[1]
1132
1133        if not isinstance(interval, exp.Interval):
1134            raise ParseError(f"INTERVAL expression expected but got '{interval}'")
1135
1136        expression = interval.this
1137        if expression and expression.is_string:
1138            expression = exp.Literal.number(expression.this)
1139
1140        return expression_class(this=args[0], expression=expression, unit=unit_to_str(interval))
1141
1142    return _builder
1143
1144
1145def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc:
1146    unit = seq_get(args, 0)
1147    this = seq_get(args, 1)
1148
1149    if isinstance(this, exp.Cast) and this.is_type("date"):
1150        return exp.DateTrunc(unit=unit, this=this)
1151    return exp.TimestampTrunc(this=this, unit=unit)
1152
1153
1154def date_add_interval_sql(
1155    data_type: str, kind: str
1156) -> t.Callable[[Generator, exp.Expression], str]:
1157    def func(self: Generator, expression: exp.Expression) -> str:
1158        this = self.sql(expression, "this")
1159        interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression))
1160        return f"{data_type}_{kind}({this}, {self.sql(interval)})"
1161
1162    return func
1163
1164
1165def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]:
1166    def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str:
1167        args = [unit_to_str(expression), expression.this]
1168        if zone:
1169            args.append(expression.args.get("zone"))
1170        return self.func("DATE_TRUNC", *args)
1171
1172    return _timestamptrunc_sql
1173
1174
1175def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str:
1176    zone = expression.args.get("zone")
1177    if not zone:
1178        from sqlglot.optimizer.annotate_types import annotate_types
1179
1180        target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP
1181        return self.sql(exp.cast(expression.this, target_type))
1182    if zone.name.lower() in TIMEZONES:
1183        return self.sql(
1184            exp.AtTimeZone(
1185                this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP),
1186                zone=zone,
1187            )
1188        )
1189    return self.func("TIMESTAMP", expression.this, zone)
1190
1191
1192def no_time_sql(self: Generator, expression: exp.Time) -> str:
1193    # Transpile BQ's TIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIME)
1194    this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)
1195    expr = exp.cast(
1196        exp.AtTimeZone(this=this, zone=expression.args.get("zone")), exp.DataType.Type.TIME
1197    )
1198    return self.sql(expr)
1199
1200
1201def no_datetime_sql(self: Generator, expression: exp.Datetime) -> str:
1202    this = expression.this
1203    expr = expression.expression
1204
1205    if expr.name.lower() in TIMEZONES:
1206        # Transpile BQ's DATETIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIMESTAMP)
1207        this = exp.cast(this, exp.DataType.Type.TIMESTAMPTZ)
1208        this = exp.cast(exp.AtTimeZone(this=this, zone=expr), exp.DataType.Type.TIMESTAMP)
1209        return self.sql(this)
1210
1211    this = exp.cast(this, exp.DataType.Type.DATE)
1212    expr = exp.cast(expr, exp.DataType.Type.TIME)
1213
1214    return self.sql(exp.cast(exp.Add(this=this, expression=expr), exp.DataType.Type.TIMESTAMP))
1215
1216
1217def locate_to_strposition(args: t.List) -> exp.Expression:
1218    return exp.StrPosition(
1219        this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2)
1220    )
1221
1222
1223def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
1224    return self.func(
1225        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
1226    )
1227
1228
1229def left_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1230    return self.sql(
1231        exp.Substring(
1232            this=expression.this, start=exp.Literal.number(1), length=expression.expression
1233        )
1234    )
1235
1236
1237def right_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1238    return self.sql(
1239        exp.Substring(
1240            this=expression.this,
1241            start=exp.Length(this=expression.this) - exp.paren(expression.expression - 1),
1242        )
1243    )
1244
1245
1246def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
1247    datatype = (
1248        exp.DataType.Type.TIMESTAMPTZ
1249        if expression.args.get("zone")
1250        else exp.DataType.Type.TIMESTAMP
1251    )
1252
1253    return self.sql(exp.cast(expression.this, datatype, dialect=self.dialect))
1254
1255
1256def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
1257    return self.sql(exp.cast(expression.this, exp.DataType.Type.DATE))
1258
1259
1260# Used for Presto and Duckdb which use functions that don't support charset, and assume utf-8
1261def encode_decode_sql(
1262    self: Generator, expression: exp.Expression, name: str, replace: bool = True
1263) -> str:
1264    charset = expression.args.get("charset")
1265    if charset and charset.name.lower() != "utf-8":
1266        self.unsupported(f"Expected utf-8 character set, got {charset}.")
1267
1268    return self.func(name, expression.this, expression.args.get("replace") if replace else None)
1269
1270
1271def min_or_least(self: Generator, expression: exp.Min) -> str:
1272    name = "LEAST" if expression.expressions else "MIN"
1273    return rename_func(name)(self, expression)
1274
1275
1276def max_or_greatest(self: Generator, expression: exp.Max) -> str:
1277    name = "GREATEST" if expression.expressions else "MAX"
1278    return rename_func(name)(self, expression)
1279
1280
1281def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str:
1282    cond = expression.this
1283
1284    if isinstance(expression.this, exp.Distinct):
1285        cond = expression.this.expressions[0]
1286        self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM")
1287
1288    return self.func("sum", exp.func("if", cond, 1, 0))
1289
1290
1291def trim_sql(self: Generator, expression: exp.Trim) -> str:
1292    target = self.sql(expression, "this")
1293    trim_type = self.sql(expression, "position")
1294    remove_chars = self.sql(expression, "expression")
1295    collation = self.sql(expression, "collation")
1296
1297    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
1298    if not remove_chars:
1299        return self.trim_sql(expression)
1300
1301    trim_type = f"{trim_type} " if trim_type else ""
1302    remove_chars = f"{remove_chars} " if remove_chars else ""
1303    from_part = "FROM " if trim_type or remove_chars else ""
1304    collation = f" COLLATE {collation}" if collation else ""
1305    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
1306
1307
1308def str_to_time_sql(self: Generator, expression: exp.Expression) -> str:
1309    return self.func("STRPTIME", expression.this, self.format_time(expression))
1310
1311
1312def concat_to_dpipe_sql(self: Generator, expression: exp.Concat) -> str:
1313    return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions))
1314
1315
1316def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str:
1317    delim, *rest_args = expression.expressions
1318    return self.sql(
1319        reduce(
1320            lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)),
1321            rest_args,
1322        )
1323    )
1324
1325
1326def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str:
1327    bad_args = list(filter(expression.args.get, ("position", "occurrence", "parameters")))
1328    if bad_args:
1329        self.unsupported(f"REGEXP_EXTRACT does not support the following arg(s): {bad_args}")
1330
1331    return self.func(
1332        "REGEXP_EXTRACT", expression.this, expression.expression, expression.args.get("group")
1333    )
1334
1335
1336def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str:
1337    bad_args = list(filter(expression.args.get, ("position", "occurrence", "modifiers")))
1338    if bad_args:
1339        self.unsupported(f"REGEXP_REPLACE does not support the following arg(s): {bad_args}")
1340
1341    return self.func(
1342        "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"]
1343    )
1344
1345
1346def pivot_column_names(aggregations: t.List[exp.Expression], dialect: DialectType) -> t.List[str]:
1347    names = []
1348    for agg in aggregations:
1349        if isinstance(agg, exp.Alias):
1350            names.append(agg.alias)
1351        else:
1352            """
1353            This case corresponds to aggregations without aliases being used as suffixes
1354            (e.g. col_avg(foo)). We need to unquote identifiers because they're going to
1355            be quoted in the base parser's `_parse_pivot` method, due to `to_identifier`.
1356            Otherwise, we'd end up with `col_avg(`foo`)` (notice the double quotes).
1357            """
1358            agg_all_unquoted = agg.transform(
1359                lambda node: (
1360                    exp.Identifier(this=node.name, quoted=False)
1361                    if isinstance(node, exp.Identifier)
1362                    else node
1363                )
1364            )
1365            names.append(agg_all_unquoted.sql(dialect=dialect, normalize_functions="lower"))
1366
1367    return names
1368
1369
1370def binary_from_function(expr_type: t.Type[B]) -> t.Callable[[t.List], B]:
1371    return lambda args: expr_type(this=seq_get(args, 0), expression=seq_get(args, 1))
1372
1373
1374# Used to represent DATE_TRUNC in Doris, Postgres and Starrocks dialects
1375def build_timestamp_trunc(args: t.List) -> exp.TimestampTrunc:
1376    return exp.TimestampTrunc(this=seq_get(args, 1), unit=seq_get(args, 0))
1377
1378
1379def any_value_to_max_sql(self: Generator, expression: exp.AnyValue) -> str:
1380    return self.func("MAX", expression.this)
1381
1382
1383def bool_xor_sql(self: Generator, expression: exp.Xor) -> str:
1384    a = self.sql(expression.left)
1385    b = self.sql(expression.right)
1386    return f"({a} AND (NOT {b})) OR ((NOT {a}) AND {b})"
1387
1388
1389def is_parse_json(expression: exp.Expression) -> bool:
1390    return isinstance(expression, exp.ParseJSON) or (
1391        isinstance(expression, exp.Cast) and expression.is_type("json")
1392    )
1393
1394
1395def isnull_to_is_null(args: t.List) -> exp.Expression:
1396    return exp.Paren(this=exp.Is(this=seq_get(args, 0), expression=exp.null()))
1397
1398
1399def generatedasidentitycolumnconstraint_sql(
1400    self: Generator, expression: exp.GeneratedAsIdentityColumnConstraint
1401) -> str:
1402    start = self.sql(expression, "start") or "1"
1403    increment = self.sql(expression, "increment") or "1"
1404    return f"IDENTITY({start}, {increment})"
1405
1406
1407def arg_max_or_min_no_count(name: str) -> t.Callable[[Generator, exp.ArgMax | exp.ArgMin], str]:
1408    def _arg_max_or_min_sql(self: Generator, expression: exp.ArgMax | exp.ArgMin) -> str:
1409        if expression.args.get("count"):
1410            self.unsupported(f"Only two arguments are supported in function {name}.")
1411
1412        return self.func(name, expression.this, expression.expression)
1413
1414    return _arg_max_or_min_sql
1415
1416
1417def ts_or_ds_add_cast(expression: exp.TsOrDsAdd) -> exp.TsOrDsAdd:
1418    this = expression.this.copy()
1419
1420    return_type = expression.return_type
1421    if return_type.is_type(exp.DataType.Type.DATE):
1422        # If we need to cast to a DATE, we cast to TIMESTAMP first to make sure we
1423        # can truncate timestamp strings, because some dialects can't cast them to DATE
1424        this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
1425
1426    expression.this.replace(exp.cast(this, return_type))
1427    return expression
1428
1429
1430def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE_ADD_OR_DIFF], str]:
1431    def _delta_sql(self: Generator, expression: DATE_ADD_OR_DIFF) -> str:
1432        if cast and isinstance(expression, exp.TsOrDsAdd):
1433            expression = ts_or_ds_add_cast(expression)
1434
1435        return self.func(
1436            name,
1437            unit_to_var(expression),
1438            expression.expression,
1439            expression.this,
1440        )
1441
1442    return _delta_sql
1443
1444
1445def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1446    unit = expression.args.get("unit")
1447
1448    if isinstance(unit, exp.Placeholder):
1449        return unit
1450    if unit:
1451        return exp.Literal.string(unit.name)
1452    return exp.Literal.string(default) if default else None
1453
1454
1455def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1456    unit = expression.args.get("unit")
1457
1458    if isinstance(unit, (exp.Var, exp.Placeholder)):
1459        return unit
1460    return exp.Var(this=default) if default else None
1461
1462
1463@t.overload
1464def map_date_part(part: exp.Expression, dialect: DialectType = Dialect) -> exp.Var:
1465    pass
1466
1467
1468@t.overload
1469def map_date_part(
1470    part: t.Optional[exp.Expression], dialect: DialectType = Dialect
1471) -> t.Optional[exp.Expression]:
1472    pass
1473
1474
1475def map_date_part(part, dialect: DialectType = Dialect):
1476    mapped = (
1477        Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None
1478    )
1479    return exp.var(mapped) if mapped else part
1480
1481
1482def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:
1483    trunc_curr_date = exp.func("date_trunc", "month", expression.this)
1484    plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month")
1485    minus_one_day = exp.func("date_sub", plus_one_month, 1, "day")
1486
1487    return self.sql(exp.cast(minus_one_day, exp.DataType.Type.DATE))
1488
1489
1490def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
1491    """Remove table refs from columns in when statements."""
1492    alias = expression.this.args.get("alias")
1493
1494    def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]:
1495        return self.dialect.normalize_identifier(identifier).name if identifier else None
1496
1497    targets = {normalize(expression.this.this)}
1498
1499    if alias:
1500        targets.add(normalize(alias.this))
1501
1502    for when in expression.expressions:
1503        # only remove the target names from the THEN clause
1504        # theyre still valid in the <condition> part of WHEN MATCHED / WHEN NOT MATCHED
1505        # ref: https://github.com/TobikoData/sqlmesh/issues/2934
1506        then = when.args.get("then")
1507        if then:
1508            then.transform(
1509                lambda node: (
1510                    exp.column(node.this)
1511                    if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets
1512                    else node
1513                ),
1514                copy=False,
1515            )
1516
1517    return self.merge_sql(expression)
1518
1519
1520def build_json_extract_path(
1521    expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
1522) -> t.Callable[[t.List], F]:
1523    def _builder(args: t.List) -> F:
1524        segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
1525        for arg in args[1:]:
1526            if not isinstance(arg, exp.Literal):
1527                # We use the fallback parser because we can't really transpile non-literals safely
1528                return expr_type.from_arg_list(args)
1529
1530            text = arg.name
1531            if is_int(text):
1532                index = int(text)
1533                segments.append(
1534                    exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1)
1535                )
1536            else:
1537                segments.append(exp.JSONPathKey(this=text))
1538
1539        # This is done to avoid failing in the expression validator due to the arg count
1540        del args[2:]
1541        return expr_type(
1542            this=seq_get(args, 0),
1543            expression=exp.JSONPath(expressions=segments),
1544            only_json_types=arrow_req_json_type,
1545        )
1546
1547    return _builder
1548
1549
1550def json_extract_segments(
1551    name: str, quoted_index: bool = True, op: t.Optional[str] = None
1552) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]:
1553    def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
1554        path = expression.expression
1555        if not isinstance(path, exp.JSONPath):
1556            return rename_func(name)(self, expression)
1557
1558        segments = []
1559        for segment in path.expressions:
1560            path = self.sql(segment)
1561            if path:
1562                if isinstance(segment, exp.JSONPathPart) and (
1563                    quoted_index or not isinstance(segment, exp.JSONPathSubscript)
1564                ):
1565                    path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}"
1566
1567                segments.append(path)
1568
1569        if op:
1570            return f" {op} ".join([self.sql(expression.this), *segments])
1571        return self.func(name, expression.this, *segments)
1572
1573    return _json_extract_segments
1574
1575
1576def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str:
1577    if isinstance(expression.this, exp.JSONPathWildcard):
1578        self.unsupported("Unsupported wildcard in JSONPathKey expression")
1579
1580    return expression.name
1581
1582
1583def filter_array_using_unnest(self: Generator, expression: exp.ArrayFilter) -> str:
1584    cond = expression.expression
1585    if isinstance(cond, exp.Lambda) and len(cond.expressions) == 1:
1586        alias = cond.expressions[0]
1587        cond = cond.this
1588    elif isinstance(cond, exp.Predicate):
1589        alias = "_u"
1590    else:
1591        self.unsupported("Unsupported filter condition")
1592        return ""
1593
1594    unnest = exp.Unnest(expressions=[expression.this])
1595    filtered = exp.select(alias).from_(exp.alias_(unnest, None, table=[alias])).where(cond)
1596    return self.sql(exp.Array(expressions=[filtered]))
1597
1598
1599def to_number_with_nls_param(self: Generator, expression: exp.ToNumber) -> str:
1600    return self.func(
1601        "TO_NUMBER",
1602        expression.this,
1603        expression.args.get("format"),
1604        expression.args.get("nlsparam"),
1605    )
1606
1607
1608def build_default_decimal_type(
1609    precision: t.Optional[int] = None, scale: t.Optional[int] = None
1610) -> t.Callable[[exp.DataType], exp.DataType]:
1611    def _builder(dtype: exp.DataType) -> exp.DataType:
1612        if dtype.expressions or precision is None:
1613            return dtype
1614
1615        params = f"{precision}{f', {scale}' if scale is not None else ''}"
1616        return exp.DataType.build(f"DECIMAL({params})")
1617
1618    return _builder
1619
1620
1621def build_timestamp_from_parts(args: t.List) -> exp.Func:
1622    if len(args) == 2:
1623        # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept,
1624        # so we parse this into Anonymous for now instead of introducing complexity
1625        return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args)
1626
1627    return exp.TimestampFromParts.from_arg_list(args)
1628
1629
1630def sha256_sql(self: Generator, expression: exp.SHA2) -> str:
1631    return self.func(f"SHA{expression.text('length') or '256'}", expression.this)
1632
1633
1634def sequence_sql(self: Generator, expression: exp.GenerateSeries | exp.GenerateDateArray) -> str:
1635    start = expression.args.get("start")
1636    end = expression.args.get("end")
1637    step = expression.args.get("step")
1638
1639    if isinstance(start, exp.Cast):
1640        target_type = start.to
1641    elif isinstance(end, exp.Cast):
1642        target_type = end.to
1643    else:
1644        target_type = None
1645
1646    if start and end and target_type and target_type.is_type("date", "timestamp"):
1647        if isinstance(start, exp.Cast) and target_type is start.to:
1648            end = exp.cast(end, target_type)
1649        else:
1650            start = exp.cast(start, target_type)
1651
1652    return self.func("SEQUENCE", start, end, step)
logger = <Logger sqlglot (WARNING)>
UNESCAPED_SEQUENCES = {'\\a': '\x07', '\\b': '\x08', '\\f': '\x0c', '\\n': '\n', '\\r': '\r', '\\t': '\t', '\\v': '\x0b', '\\\\': '\\'}
class Dialects(builtins.str, enum.Enum):
49class Dialects(str, Enum):
50    """Dialects supported by SQLGLot."""
51
52    DIALECT = ""
53
54    ATHENA = "athena"
55    BIGQUERY = "bigquery"
56    CLICKHOUSE = "clickhouse"
57    DATABRICKS = "databricks"
58    DORIS = "doris"
59    DRILL = "drill"
60    DUCKDB = "duckdb"
61    HIVE = "hive"
62    MATERIALIZE = "materialize"
63    MYSQL = "mysql"
64    ORACLE = "oracle"
65    POSTGRES = "postgres"
66    PRESTO = "presto"
67    PRQL = "prql"
68    REDSHIFT = "redshift"
69    RISINGWAVE = "risingwave"
70    SNOWFLAKE = "snowflake"
71    SPARK = "spark"
72    SPARK2 = "spark2"
73    SQLITE = "sqlite"
74    STARROCKS = "starrocks"
75    TABLEAU = "tableau"
76    TERADATA = "teradata"
77    TRINO = "trino"
78    TSQL = "tsql"

Dialects supported by SQLGLot.

DIALECT = <Dialects.DIALECT: ''>
ATHENA = <Dialects.ATHENA: 'athena'>
BIGQUERY = <Dialects.BIGQUERY: 'bigquery'>
CLICKHOUSE = <Dialects.CLICKHOUSE: 'clickhouse'>
DATABRICKS = <Dialects.DATABRICKS: 'databricks'>
DORIS = <Dialects.DORIS: 'doris'>
DRILL = <Dialects.DRILL: 'drill'>
DUCKDB = <Dialects.DUCKDB: 'duckdb'>
HIVE = <Dialects.HIVE: 'hive'>
MATERIALIZE = <Dialects.MATERIALIZE: 'materialize'>
MYSQL = <Dialects.MYSQL: 'mysql'>
ORACLE = <Dialects.ORACLE: 'oracle'>
POSTGRES = <Dialects.POSTGRES: 'postgres'>
PRESTO = <Dialects.PRESTO: 'presto'>
PRQL = <Dialects.PRQL: 'prql'>
REDSHIFT = <Dialects.REDSHIFT: 'redshift'>
RISINGWAVE = <Dialects.RISINGWAVE: 'risingwave'>
SNOWFLAKE = <Dialects.SNOWFLAKE: 'snowflake'>
SPARK = <Dialects.SPARK: 'spark'>
SPARK2 = <Dialects.SPARK2: 'spark2'>
SQLITE = <Dialects.SQLITE: 'sqlite'>
STARROCKS = <Dialects.STARROCKS: 'starrocks'>
TABLEAU = <Dialects.TABLEAU: 'tableau'>
TERADATA = <Dialects.TERADATA: 'teradata'>
TRINO = <Dialects.TRINO: 'trino'>
TSQL = <Dialects.TSQL: 'tsql'>
Inherited Members
enum.Enum
name
value
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
class NormalizationStrategy(builtins.str, sqlglot.helper.AutoName):
81class NormalizationStrategy(str, AutoName):
82    """Specifies the strategy according to which identifiers should be normalized."""
83
84    LOWERCASE = auto()
85    """Unquoted identifiers are lowercased."""
86
87    UPPERCASE = auto()
88    """Unquoted identifiers are uppercased."""
89
90    CASE_SENSITIVE = auto()
91    """Always case-sensitive, regardless of quotes."""
92
93    CASE_INSENSITIVE = auto()
94    """Always case-insensitive, regardless of quotes."""

Specifies the strategy according to which identifiers should be normalized.

LOWERCASE = <NormalizationStrategy.LOWERCASE: 'LOWERCASE'>

Unquoted identifiers are lowercased.

UPPERCASE = <NormalizationStrategy.UPPERCASE: 'UPPERCASE'>

Unquoted identifiers are uppercased.

CASE_SENSITIVE = <NormalizationStrategy.CASE_SENSITIVE: 'CASE_SENSITIVE'>

Always case-sensitive, regardless of quotes.

CASE_INSENSITIVE = <NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>

Always case-insensitive, regardless of quotes.

Inherited Members
enum.Enum
name
value
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
class Dialect:
222class Dialect(metaclass=_Dialect):
223    INDEX_OFFSET = 0
224    """The base index offset for arrays."""
225
226    WEEK_OFFSET = 0
227    """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday."""
228
229    UNNEST_COLUMN_ONLY = False
230    """Whether `UNNEST` table aliases are treated as column aliases."""
231
232    ALIAS_POST_TABLESAMPLE = False
233    """Whether the table alias comes after tablesample."""
234
235    TABLESAMPLE_SIZE_IS_PERCENT = False
236    """Whether a size in the table sample clause represents percentage."""
237
238    NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE
239    """Specifies the strategy according to which identifiers should be normalized."""
240
241    IDENTIFIERS_CAN_START_WITH_DIGIT = False
242    """Whether an unquoted identifier can start with a digit."""
243
244    DPIPE_IS_STRING_CONCAT = True
245    """Whether the DPIPE token (`||`) is a string concatenation operator."""
246
247    STRICT_STRING_CONCAT = False
248    """Whether `CONCAT`'s arguments must be strings."""
249
250    SUPPORTS_USER_DEFINED_TYPES = True
251    """Whether user-defined data types are supported."""
252
253    SUPPORTS_SEMI_ANTI_JOIN = True
254    """Whether `SEMI` or `ANTI` joins are supported."""
255
256    SUPPORTS_COLUMN_JOIN_MARKS = False
257    """Whether the old-style outer join (+) syntax is supported."""
258
259    COPY_PARAMS_ARE_CSV = True
260    """Separator of COPY statement parameters."""
261
262    NORMALIZE_FUNCTIONS: bool | str = "upper"
263    """
264    Determines how function names are going to be normalized.
265    Possible values:
266        "upper" or True: Convert names to uppercase.
267        "lower": Convert names to lowercase.
268        False: Disables function name normalization.
269    """
270
271    LOG_BASE_FIRST: t.Optional[bool] = True
272    """
273    Whether the base comes first in the `LOG` function.
274    Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`)
275    """
276
277    NULL_ORDERING = "nulls_are_small"
278    """
279    Default `NULL` ordering method to use if not explicitly set.
280    Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"`
281    """
282
283    TYPED_DIVISION = False
284    """
285    Whether the behavior of `a / b` depends on the types of `a` and `b`.
286    False means `a / b` is always float division.
287    True means `a / b` is integer division if both `a` and `b` are integers.
288    """
289
290    SAFE_DIVISION = False
291    """Whether division by zero throws an error (`False`) or returns NULL (`True`)."""
292
293    CONCAT_COALESCE = False
294    """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string."""
295
296    HEX_LOWERCASE = False
297    """Whether the `HEX` function returns a lowercase hexadecimal string."""
298
299    DATE_FORMAT = "'%Y-%m-%d'"
300    DATEINT_FORMAT = "'%Y%m%d'"
301    TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
302
303    TIME_MAPPING: t.Dict[str, str] = {}
304    """Associates this dialect's time formats with their equivalent Python `strftime` formats."""
305
306    # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time
307    # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE
308    FORMAT_MAPPING: t.Dict[str, str] = {}
309    """
310    Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`.
311    If empty, the corresponding trie will be constructed off of `TIME_MAPPING`.
312    """
313
314    UNESCAPED_SEQUENCES: t.Dict[str, str] = {}
315    """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`)."""
316
317    PSEUDOCOLUMNS: t.Set[str] = set()
318    """
319    Columns that are auto-generated by the engine corresponding to this dialect.
320    For example, such columns may be excluded from `SELECT *` queries.
321    """
322
323    PREFER_CTE_ALIAS_COLUMN = False
324    """
325    Some dialects, such as Snowflake, allow you to reference a CTE column alias in the
326    HAVING clause of the CTE. This flag will cause the CTE alias columns to override
327    any projection aliases in the subquery.
328
329    For example,
330        WITH y(c) AS (
331            SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0
332        ) SELECT c FROM y;
333
334        will be rewritten as
335
336        WITH y(c) AS (
337            SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
338        ) SELECT c FROM y;
339    """
340
341    COPY_PARAMS_ARE_CSV = True
342    """
343    Whether COPY statement parameters are separated by comma or whitespace
344    """
345
346    FORCE_EARLY_ALIAS_REF_EXPANSION = False
347    """
348    Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
349
350    For example:
351        WITH data AS (
352        SELECT
353            1 AS id,
354            2 AS my_id
355        )
356        SELECT
357            id AS my_id
358        FROM
359            data
360        WHERE
361            my_id = 1
362        GROUP BY
363            my_id,
364        HAVING
365            my_id = 1
366
367    In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except:
368        - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1"
369        - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
370    """
371
372    EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False
373    """Whether alias reference expansion before qualification should only happen for the GROUP BY clause."""
374
375    SUPPORTS_ORDER_BY_ALL = False
376    """
377    Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
378    """
379
380    HAS_DISTINCT_ARRAY_CONSTRUCTORS = False
381    """
382    Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3)
383    as the former is of type INT[] vs the latter which is SUPER
384    """
385
386    SUPPORTS_FIXED_SIZE_ARRAYS = False
387    """
388    Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In
389    dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator
390    """
391
392    CREATABLE_KIND_MAPPING: dict[str, str] = {}
393    """
394    Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse
395    equivalent of CREATE SCHEMA is CREATE DATABASE.
396    """
397
398    # --- Autofilled ---
399
400    tokenizer_class = Tokenizer
401    jsonpath_tokenizer_class = JSONPathTokenizer
402    parser_class = Parser
403    generator_class = Generator
404
405    # A trie of the time_mapping keys
406    TIME_TRIE: t.Dict = {}
407    FORMAT_TRIE: t.Dict = {}
408
409    INVERSE_TIME_MAPPING: t.Dict[str, str] = {}
410    INVERSE_TIME_TRIE: t.Dict = {}
411    INVERSE_FORMAT_MAPPING: t.Dict[str, str] = {}
412    INVERSE_FORMAT_TRIE: t.Dict = {}
413
414    INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
415
416    ESCAPED_SEQUENCES: t.Dict[str, str] = {}
417
418    # Delimiters for string literals and identifiers
419    QUOTE_START = "'"
420    QUOTE_END = "'"
421    IDENTIFIER_START = '"'
422    IDENTIFIER_END = '"'
423
424    # Delimiters for bit, hex, byte and unicode literals
425    BIT_START: t.Optional[str] = None
426    BIT_END: t.Optional[str] = None
427    HEX_START: t.Optional[str] = None
428    HEX_END: t.Optional[str] = None
429    BYTE_START: t.Optional[str] = None
430    BYTE_END: t.Optional[str] = None
431    UNICODE_START: t.Optional[str] = None
432    UNICODE_END: t.Optional[str] = None
433
434    DATE_PART_MAPPING = {
435        "Y": "YEAR",
436        "YY": "YEAR",
437        "YYY": "YEAR",
438        "YYYY": "YEAR",
439        "YR": "YEAR",
440        "YEARS": "YEAR",
441        "YRS": "YEAR",
442        "MM": "MONTH",
443        "MON": "MONTH",
444        "MONS": "MONTH",
445        "MONTHS": "MONTH",
446        "D": "DAY",
447        "DD": "DAY",
448        "DAYS": "DAY",
449        "DAYOFMONTH": "DAY",
450        "DAY OF WEEK": "DAYOFWEEK",
451        "WEEKDAY": "DAYOFWEEK",
452        "DOW": "DAYOFWEEK",
453        "DW": "DAYOFWEEK",
454        "WEEKDAY_ISO": "DAYOFWEEKISO",
455        "DOW_ISO": "DAYOFWEEKISO",
456        "DW_ISO": "DAYOFWEEKISO",
457        "DAY OF YEAR": "DAYOFYEAR",
458        "DOY": "DAYOFYEAR",
459        "DY": "DAYOFYEAR",
460        "W": "WEEK",
461        "WK": "WEEK",
462        "WEEKOFYEAR": "WEEK",
463        "WOY": "WEEK",
464        "WY": "WEEK",
465        "WEEK_ISO": "WEEKISO",
466        "WEEKOFYEARISO": "WEEKISO",
467        "WEEKOFYEAR_ISO": "WEEKISO",
468        "Q": "QUARTER",
469        "QTR": "QUARTER",
470        "QTRS": "QUARTER",
471        "QUARTERS": "QUARTER",
472        "H": "HOUR",
473        "HH": "HOUR",
474        "HR": "HOUR",
475        "HOURS": "HOUR",
476        "HRS": "HOUR",
477        "M": "MINUTE",
478        "MI": "MINUTE",
479        "MIN": "MINUTE",
480        "MINUTES": "MINUTE",
481        "MINS": "MINUTE",
482        "S": "SECOND",
483        "SEC": "SECOND",
484        "SECONDS": "SECOND",
485        "SECS": "SECOND",
486        "MS": "MILLISECOND",
487        "MSEC": "MILLISECOND",
488        "MSECS": "MILLISECOND",
489        "MSECOND": "MILLISECOND",
490        "MSECONDS": "MILLISECOND",
491        "MILLISEC": "MILLISECOND",
492        "MILLISECS": "MILLISECOND",
493        "MILLISECON": "MILLISECOND",
494        "MILLISECONDS": "MILLISECOND",
495        "US": "MICROSECOND",
496        "USEC": "MICROSECOND",
497        "USECS": "MICROSECOND",
498        "MICROSEC": "MICROSECOND",
499        "MICROSECS": "MICROSECOND",
500        "USECOND": "MICROSECOND",
501        "USECONDS": "MICROSECOND",
502        "MICROSECONDS": "MICROSECOND",
503        "NS": "NANOSECOND",
504        "NSEC": "NANOSECOND",
505        "NANOSEC": "NANOSECOND",
506        "NSECOND": "NANOSECOND",
507        "NSECONDS": "NANOSECOND",
508        "NANOSECS": "NANOSECOND",
509        "EPOCH_SECOND": "EPOCH",
510        "EPOCH_SECONDS": "EPOCH",
511        "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND",
512        "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND",
513        "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND",
514        "TZH": "TIMEZONE_HOUR",
515        "TZM": "TIMEZONE_MINUTE",
516        "DEC": "DECADE",
517        "DECS": "DECADE",
518        "DECADES": "DECADE",
519        "MIL": "MILLENIUM",
520        "MILS": "MILLENIUM",
521        "MILLENIA": "MILLENIUM",
522        "C": "CENTURY",
523        "CENT": "CENTURY",
524        "CENTS": "CENTURY",
525        "CENTURIES": "CENTURY",
526    }
527
528    TYPE_TO_EXPRESSIONS: t.Dict[exp.DataType.Type, t.Set[t.Type[exp.Expression]]] = {
529        exp.DataType.Type.BIGINT: {
530            exp.ApproxDistinct,
531            exp.ArraySize,
532            exp.Count,
533            exp.Length,
534        },
535        exp.DataType.Type.BOOLEAN: {
536            exp.Between,
537            exp.Boolean,
538            exp.In,
539            exp.RegexpLike,
540        },
541        exp.DataType.Type.DATE: {
542            exp.CurrentDate,
543            exp.Date,
544            exp.DateFromParts,
545            exp.DateStrToDate,
546            exp.DiToDate,
547            exp.StrToDate,
548            exp.TimeStrToDate,
549            exp.TsOrDsToDate,
550        },
551        exp.DataType.Type.DATETIME: {
552            exp.CurrentDatetime,
553            exp.Datetime,
554            exp.DatetimeAdd,
555            exp.DatetimeSub,
556        },
557        exp.DataType.Type.DOUBLE: {
558            exp.ApproxQuantile,
559            exp.Avg,
560            exp.Div,
561            exp.Exp,
562            exp.Ln,
563            exp.Log,
564            exp.Pow,
565            exp.Quantile,
566            exp.Round,
567            exp.SafeDivide,
568            exp.Sqrt,
569            exp.Stddev,
570            exp.StddevPop,
571            exp.StddevSamp,
572            exp.Variance,
573            exp.VariancePop,
574        },
575        exp.DataType.Type.INT: {
576            exp.Ceil,
577            exp.DatetimeDiff,
578            exp.DateDiff,
579            exp.TimestampDiff,
580            exp.TimeDiff,
581            exp.DateToDi,
582            exp.Levenshtein,
583            exp.Sign,
584            exp.StrPosition,
585            exp.TsOrDiToDi,
586        },
587        exp.DataType.Type.JSON: {
588            exp.ParseJSON,
589        },
590        exp.DataType.Type.TIME: {
591            exp.Time,
592        },
593        exp.DataType.Type.TIMESTAMP: {
594            exp.CurrentTime,
595            exp.CurrentTimestamp,
596            exp.StrToTime,
597            exp.TimeAdd,
598            exp.TimeStrToTime,
599            exp.TimeSub,
600            exp.TimestampAdd,
601            exp.TimestampSub,
602            exp.UnixToTime,
603        },
604        exp.DataType.Type.TINYINT: {
605            exp.Day,
606            exp.Month,
607            exp.Week,
608            exp.Year,
609            exp.Quarter,
610        },
611        exp.DataType.Type.VARCHAR: {
612            exp.ArrayConcat,
613            exp.Concat,
614            exp.ConcatWs,
615            exp.DateToDateStr,
616            exp.GroupConcat,
617            exp.Initcap,
618            exp.Lower,
619            exp.Substring,
620            exp.TimeToStr,
621            exp.TimeToTimeStr,
622            exp.Trim,
623            exp.TsOrDsToDateStr,
624            exp.UnixToStr,
625            exp.UnixToTimeStr,
626            exp.Upper,
627        },
628    }
629
630    ANNOTATORS: AnnotatorsType = {
631        **{
632            expr_type: lambda self, e: self._annotate_unary(e)
633            for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias))
634        },
635        **{
636            expr_type: lambda self, e: self._annotate_binary(e)
637            for expr_type in subclasses(exp.__name__, exp.Binary)
638        },
639        **{
640            expr_type: _annotate_with_type_lambda(data_type)
641            for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
642            for expr_type in expressions
643        },
644        exp.Abs: lambda self, e: self._annotate_by_args(e, "this"),
645        exp.Anonymous: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
646        exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True),
647        exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True),
648        exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
649        exp.Bracket: lambda self, e: self._annotate_bracket(e),
650        exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
651        exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"),
652        exp.Coalesce: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
653        exp.DataType: lambda self, e: self._annotate_with_type(e, e.copy()),
654        exp.DateAdd: lambda self, e: self._annotate_timeunit(e),
655        exp.DateSub: lambda self, e: self._annotate_timeunit(e),
656        exp.DateTrunc: lambda self, e: self._annotate_timeunit(e),
657        exp.Distinct: lambda self, e: self._annotate_by_args(e, "expressions"),
658        exp.Div: lambda self, e: self._annotate_div(e),
659        exp.Dot: lambda self, e: self._annotate_dot(e),
660        exp.Explode: lambda self, e: self._annotate_explode(e),
661        exp.Extract: lambda self, e: self._annotate_extract(e),
662        exp.Filter: lambda self, e: self._annotate_by_args(e, "this"),
663        exp.GenerateDateArray: lambda self, e: self._annotate_with_type(
664            e, exp.DataType.build("ARRAY<DATE>")
665        ),
666        exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
667            e, exp.DataType.build("ARRAY<TIMESTAMP>")
668        ),
669        exp.If: lambda self, e: self._annotate_by_args(e, "true", "false"),
670        exp.Interval: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.INTERVAL),
671        exp.Least: lambda self, e: self._annotate_by_args(e, "expressions"),
672        exp.Literal: lambda self, e: self._annotate_literal(e),
673        exp.Map: lambda self, e: self._annotate_map(e),
674        exp.Max: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
675        exp.Min: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
676        exp.Null: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.NULL),
677        exp.Nullif: lambda self, e: self._annotate_by_args(e, "this", "expression"),
678        exp.PropertyEQ: lambda self, e: self._annotate_by_args(e, "expression"),
679        exp.Slice: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN),
680        exp.Struct: lambda self, e: self._annotate_struct(e),
681        exp.Sum: lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True),
682        exp.Timestamp: lambda self, e: self._annotate_with_type(
683            e,
684            exp.DataType.Type.TIMESTAMPTZ if e.args.get("with_tz") else exp.DataType.Type.TIMESTAMP,
685        ),
686        exp.ToMap: lambda self, e: self._annotate_to_map(e),
687        exp.TryCast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
688        exp.Unnest: lambda self, e: self._annotate_unnest(e),
689        exp.VarMap: lambda self, e: self._annotate_map(e),
690    }
691
692    @classmethod
693    def get_or_raise(cls, dialect: DialectType) -> Dialect:
694        """
695        Look up a dialect in the global dialect registry and return it if it exists.
696
697        Args:
698            dialect: The target dialect. If this is a string, it can be optionally followed by
699                additional key-value pairs that are separated by commas and are used to specify
700                dialect settings, such as whether the dialect's identifiers are case-sensitive.
701
702        Example:
703            >>> dialect = dialect_class = get_or_raise("duckdb")
704            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
705
706        Returns:
707            The corresponding Dialect instance.
708        """
709
710        if not dialect:
711            return cls()
712        if isinstance(dialect, _Dialect):
713            return dialect()
714        if isinstance(dialect, Dialect):
715            return dialect
716        if isinstance(dialect, str):
717            try:
718                dialect_name, *kv_strings = dialect.split(",")
719                kv_pairs = (kv.split("=") for kv in kv_strings)
720                kwargs = {}
721                for pair in kv_pairs:
722                    key = pair[0].strip()
723                    value: t.Union[bool | str | None] = None
724
725                    if len(pair) == 1:
726                        # Default initialize standalone settings to True
727                        value = True
728                    elif len(pair) == 2:
729                        value = pair[1].strip()
730
731                        # Coerce the value to boolean if it matches to the truthy/falsy values below
732                        value_lower = value.lower()
733                        if value_lower in ("true", "1"):
734                            value = True
735                        elif value_lower in ("false", "0"):
736                            value = False
737
738                    kwargs[key] = value
739
740            except ValueError:
741                raise ValueError(
742                    f"Invalid dialect format: '{dialect}'. "
743                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
744                )
745
746            result = cls.get(dialect_name.strip())
747            if not result:
748                from difflib import get_close_matches
749
750                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
751                if similar:
752                    similar = f" Did you mean {similar}?"
753
754                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
755
756            return result(**kwargs)
757
758        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
759
760    @classmethod
761    def format_time(
762        cls, expression: t.Optional[str | exp.Expression]
763    ) -> t.Optional[exp.Expression]:
764        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
765        if isinstance(expression, str):
766            return exp.Literal.string(
767                # the time formats are quoted
768                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
769            )
770
771        if expression and expression.is_string:
772            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
773
774        return expression
775
776    def __init__(self, **kwargs) -> None:
777        normalization_strategy = kwargs.pop("normalization_strategy", None)
778
779        if normalization_strategy is None:
780            self.normalization_strategy = self.NORMALIZATION_STRATEGY
781        else:
782            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
783
784        self.settings = kwargs
785
786    def __eq__(self, other: t.Any) -> bool:
787        # Does not currently take dialect state into account
788        return type(self) == other
789
790    def __hash__(self) -> int:
791        # Does not currently take dialect state into account
792        return hash(type(self))
793
794    def normalize_identifier(self, expression: E) -> E:
795        """
796        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
797
798        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
799        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
800        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
801        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
802
803        There are also dialects like Spark, which are case-insensitive even when quotes are
804        present, and dialects like MySQL, whose resolution rules match those employed by the
805        underlying operating system, for example they may always be case-sensitive in Linux.
806
807        Finally, the normalization behavior of some engines can even be controlled through flags,
808        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
809
810        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
811        that it can analyze queries in the optimizer and successfully capture their semantics.
812        """
813        if (
814            isinstance(expression, exp.Identifier)
815            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
816            and (
817                not expression.quoted
818                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
819            )
820        ):
821            expression.set(
822                "this",
823                (
824                    expression.this.upper()
825                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
826                    else expression.this.lower()
827                ),
828            )
829
830        return expression
831
832    def case_sensitive(self, text: str) -> bool:
833        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
834        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
835            return False
836
837        unsafe = (
838            str.islower
839            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
840            else str.isupper
841        )
842        return any(unsafe(char) for char in text)
843
844    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
845        """Checks if text can be identified given an identify option.
846
847        Args:
848            text: The text to check.
849            identify:
850                `"always"` or `True`: Always returns `True`.
851                `"safe"`: Only returns `True` if the identifier is case-insensitive.
852
853        Returns:
854            Whether the given text can be identified.
855        """
856        if identify is True or identify == "always":
857            return True
858
859        if identify == "safe":
860            return not self.case_sensitive(text)
861
862        return False
863
864    def quote_identifier(self, expression: E, identify: bool = True) -> E:
865        """
866        Adds quotes to a given identifier.
867
868        Args:
869            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
870            identify: If set to `False`, the quotes will only be added if the identifier is deemed
871                "unsafe", with respect to its characters and this dialect's normalization strategy.
872        """
873        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
874            name = expression.this
875            expression.set(
876                "quoted",
877                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
878            )
879
880        return expression
881
882    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
883        if isinstance(path, exp.Literal):
884            path_text = path.name
885            if path.is_number:
886                path_text = f"[{path_text}]"
887            try:
888                return parse_json_path(path_text, self)
889            except ParseError as e:
890                logger.warning(f"Invalid JSON path syntax. {str(e)}")
891
892        return path
893
894    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
895        return self.parser(**opts).parse(self.tokenize(sql), sql)
896
897    def parse_into(
898        self, expression_type: exp.IntoType, sql: str, **opts
899    ) -> t.List[t.Optional[exp.Expression]]:
900        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
901
902    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
903        return self.generator(**opts).generate(expression, copy=copy)
904
905    def transpile(self, sql: str, **opts) -> t.List[str]:
906        return [
907            self.generate(expression, copy=False, **opts) if expression else ""
908            for expression in self.parse(sql)
909        ]
910
911    def tokenize(self, sql: str) -> t.List[Token]:
912        return self.tokenizer.tokenize(sql)
913
914    @property
915    def tokenizer(self) -> Tokenizer:
916        return self.tokenizer_class(dialect=self)
917
918    @property
919    def jsonpath_tokenizer(self) -> JSONPathTokenizer:
920        return self.jsonpath_tokenizer_class(dialect=self)
921
922    def parser(self, **opts) -> Parser:
923        return self.parser_class(dialect=self, **opts)
924
925    def generator(self, **opts) -> Generator:
926        return self.generator_class(dialect=self, **opts)
Dialect(**kwargs)
776    def __init__(self, **kwargs) -> None:
777        normalization_strategy = kwargs.pop("normalization_strategy", None)
778
779        if normalization_strategy is None:
780            self.normalization_strategy = self.NORMALIZATION_STRATEGY
781        else:
782            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
783
784        self.settings = kwargs
INDEX_OFFSET = 0

The base index offset for arrays.

WEEK_OFFSET = 0

First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.

UNNEST_COLUMN_ONLY = False

Whether UNNEST table aliases are treated as column aliases.

ALIAS_POST_TABLESAMPLE = False

Whether the table alias comes after tablesample.

TABLESAMPLE_SIZE_IS_PERCENT = False

Whether a size in the table sample clause represents percentage.

NORMALIZATION_STRATEGY = <NormalizationStrategy.LOWERCASE: 'LOWERCASE'>

Specifies the strategy according to which identifiers should be normalized.

IDENTIFIERS_CAN_START_WITH_DIGIT = False

Whether an unquoted identifier can start with a digit.

DPIPE_IS_STRING_CONCAT = True

Whether the DPIPE token (||) is a string concatenation operator.

STRICT_STRING_CONCAT = False

Whether CONCAT's arguments must be strings.

SUPPORTS_USER_DEFINED_TYPES = True

Whether user-defined data types are supported.

SUPPORTS_SEMI_ANTI_JOIN = True

Whether SEMI or ANTI joins are supported.

SUPPORTS_COLUMN_JOIN_MARKS = False

Whether the old-style outer join (+) syntax is supported.

COPY_PARAMS_ARE_CSV = True

Whether COPY statement parameters are separated by comma or whitespace

NORMALIZE_FUNCTIONS: bool | str = 'upper'

Determines how function names are going to be normalized.

Possible values:

"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.

LOG_BASE_FIRST: Optional[bool] = True

Whether the base comes first in the LOG function. Possible values: True, False, None (two arguments are not supported by LOG)

NULL_ORDERING = 'nulls_are_small'

Default NULL ordering method to use if not explicitly set. Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"

TYPED_DIVISION = False

Whether the behavior of a / b depends on the types of a and b. False means a / b is always float division. True means a / b is integer division if both a and b are integers.

SAFE_DIVISION = False

Whether division by zero throws an error (False) or returns NULL (True).

CONCAT_COALESCE = False

A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.

HEX_LOWERCASE = False

Whether the HEX function returns a lowercase hexadecimal string.

DATE_FORMAT = "'%Y-%m-%d'"
DATEINT_FORMAT = "'%Y%m%d'"
TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
TIME_MAPPING: Dict[str, str] = {}

Associates this dialect's time formats with their equivalent Python strftime formats.

FORMAT_MAPPING: Dict[str, str] = {}

Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy'). If empty, the corresponding trie will be constructed off of TIME_MAPPING.

UNESCAPED_SEQUENCES: Dict[str, str] = {}

Mapping of an escaped sequence (\n) to its unescaped version ( ).

PSEUDOCOLUMNS: Set[str] = set()

Columns that are auto-generated by the engine corresponding to this dialect. For example, such columns may be excluded from SELECT * queries.

PREFER_CTE_ALIAS_COLUMN = False

Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.

For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;

will be rewritten as

WITH y(c) AS (
    SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
FORCE_EARLY_ALIAS_REF_EXPANSION = False

Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).

For example:

WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1

In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"

EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False

Whether alias reference expansion before qualification should only happen for the GROUP BY clause.

SUPPORTS_ORDER_BY_ALL = False

Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks

HAS_DISTINCT_ARRAY_CONSTRUCTORS = False

Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3) as the former is of type INT[] vs the latter which is SUPER

SUPPORTS_FIXED_SIZE_ARRAYS = False

Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator

CREATABLE_KIND_MAPPING: dict[str, str] = {}

Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.

tokenizer_class = <class 'sqlglot.tokens.Tokenizer'>
jsonpath_tokenizer_class = <class 'sqlglot.tokens.JSONPathTokenizer'>
parser_class = <class 'sqlglot.parser.Parser'>
generator_class = <class 'sqlglot.generator.Generator'>
TIME_TRIE: Dict = {}
FORMAT_TRIE: Dict = {}
INVERSE_TIME_MAPPING: Dict[str, str] = {}
INVERSE_TIME_TRIE: Dict = {}
INVERSE_FORMAT_MAPPING: Dict[str, str] = {}
INVERSE_FORMAT_TRIE: Dict = {}
INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {}
ESCAPED_SEQUENCES: Dict[str, str] = {}
QUOTE_START = "'"
QUOTE_END = "'"
IDENTIFIER_START = '"'
IDENTIFIER_END = '"'
BIT_START: Optional[str] = None
BIT_END: Optional[str] = None
HEX_START: Optional[str] = None
HEX_END: Optional[str] = None
BYTE_START: Optional[str] = None
BYTE_END: Optional[str] = None
UNICODE_START: Optional[str] = None
UNICODE_END: Optional[str] = None
DATE_PART_MAPPING = {'Y': 'YEAR', 'YY': 'YEAR', 'YYY': 'YEAR', 'YYYY': 'YEAR', 'YR': 'YEAR', 'YEARS': 'YEAR', 'YRS': 'YEAR', 'MM': 'MONTH', 'MON': 'MONTH', 'MONS': 'MONTH', 'MONTHS': 'MONTH', 'D': 'DAY', 'DD': 'DAY', 'DAYS': 'DAY', 'DAYOFMONTH': 'DAY', 'DAY OF WEEK': 'DAYOFWEEK', 'WEEKDAY': 'DAYOFWEEK', 'DOW': 'DAYOFWEEK', 'DW': 'DAYOFWEEK', 'WEEKDAY_ISO': 'DAYOFWEEKISO', 'DOW_ISO': 'DAYOFWEEKISO', 'DW_ISO': 'DAYOFWEEKISO', 'DAY OF YEAR': 'DAYOFYEAR', 'DOY': 'DAYOFYEAR', 'DY': 'DAYOFYEAR', 'W': 'WEEK', 'WK': 'WEEK', 'WEEKOFYEAR': 'WEEK', 'WOY': 'WEEK', 'WY': 'WEEK', 'WEEK_ISO': 'WEEKISO', 'WEEKOFYEARISO': 'WEEKISO', 'WEEKOFYEAR_ISO': 'WEEKISO', 'Q': 'QUARTER', 'QTR': 'QUARTER', 'QTRS': 'QUARTER', 'QUARTERS': 'QUARTER', 'H': 'HOUR', 'HH': 'HOUR', 'HR': 'HOUR', 'HOURS': 'HOUR', 'HRS': 'HOUR', 'M': 'MINUTE', 'MI': 'MINUTE', 'MIN': 'MINUTE', 'MINUTES': 'MINUTE', 'MINS': 'MINUTE', 'S': 'SECOND', 'SEC': 'SECOND', 'SECONDS': 'SECOND', 'SECS': 'SECOND', 'MS': 'MILLISECOND', 'MSEC': 'MILLISECOND', 'MSECS': 'MILLISECOND', 'MSECOND': 'MILLISECOND', 'MSECONDS': 'MILLISECOND', 'MILLISEC': 'MILLISECOND', 'MILLISECS': 'MILLISECOND', 'MILLISECON': 'MILLISECOND', 'MILLISECONDS': 'MILLISECOND', 'US': 'MICROSECOND', 'USEC': 'MICROSECOND', 'USECS': 'MICROSECOND', 'MICROSEC': 'MICROSECOND', 'MICROSECS': 'MICROSECOND', 'USECOND': 'MICROSECOND', 'USECONDS': 'MICROSECOND', 'MICROSECONDS': 'MICROSECOND', 'NS': 'NANOSECOND', 'NSEC': 'NANOSECOND', 'NANOSEC': 'NANOSECOND', 'NSECOND': 'NANOSECOND', 'NSECONDS': 'NANOSECOND', 'NANOSECS': 'NANOSECOND', 'EPOCH_SECOND': 'EPOCH', 'EPOCH_SECONDS': 'EPOCH', 'EPOCH_MILLISECONDS': 'EPOCH_MILLISECOND', 'EPOCH_MICROSECONDS': 'EPOCH_MICROSECOND', 'EPOCH_NANOSECONDS': 'EPOCH_NANOSECOND', 'TZH': 'TIMEZONE_HOUR', 'TZM': 'TIMEZONE_MINUTE', 'DEC': 'DECADE', 'DECS': 'DECADE', 'DECADES': 'DECADE', 'MIL': 'MILLENIUM', 'MILS': 'MILLENIUM', 'MILLENIA': 'MILLENIUM', 'C': 'CENTURY', 'CENT': 'CENTURY', 'CENTS': 'CENTURY', 'CENTURIES': 'CENTURY'}
TYPE_TO_EXPRESSIONS: Dict[sqlglot.expressions.DataType.Type, Set[Type[sqlglot.expressions.Expression]]] = {<Type.BIGINT: 'BIGINT'>: {<class 'sqlglot.expressions.ArraySize'>, <class 'sqlglot.expressions.Count'>, <class 'sqlglot.expressions.ApproxDistinct'>, <class 'sqlglot.expressions.Length'>}, <Type.BOOLEAN: 'BOOLEAN'>: {<class 'sqlglot.expressions.RegexpLike'>, <class 'sqlglot.expressions.Boolean'>, <class 'sqlglot.expressions.Between'>, <class 'sqlglot.expressions.In'>}, <Type.DATE: 'DATE'>: {<class 'sqlglot.expressions.StrToDate'>, <class 'sqlglot.expressions.Date'>, <class 'sqlglot.expressions.CurrentDate'>, <class 'sqlglot.expressions.TsOrDsToDate'>, <class 'sqlglot.expressions.DateStrToDate'>, <class 'sqlglot.expressions.TimeStrToDate'>, <class 'sqlglot.expressions.DateFromParts'>, <class 'sqlglot.expressions.DiToDate'>}, <Type.DATETIME: 'DATETIME'>: {<class 'sqlglot.expressions.DatetimeSub'>, <class 'sqlglot.expressions.Datetime'>, <class 'sqlglot.expressions.CurrentDatetime'>, <class 'sqlglot.expressions.DatetimeAdd'>}, <Type.DOUBLE: 'DOUBLE'>: {<class 'sqlglot.expressions.ApproxQuantile'>, <class 'sqlglot.expressions.Log'>, <class 'sqlglot.expressions.Ln'>, <class 'sqlglot.expressions.Quantile'>, <class 'sqlglot.expressions.VariancePop'>, <class 'sqlglot.expressions.SafeDivide'>, <class 'sqlglot.expressions.Variance'>, <class 'sqlglot.expressions.Div'>, <class 'sqlglot.expressions.Avg'>, <class 'sqlglot.expressions.StddevSamp'>, <class 'sqlglot.expressions.Pow'>, <class 'sqlglot.expressions.Round'>, <class 'sqlglot.expressions.StddevPop'>, <class 'sqlglot.expressions.Exp'>, <class 'sqlglot.expressions.Stddev'>, <class 'sqlglot.expressions.Sqrt'>}, <Type.INT: 'INT'>: {<class 'sqlglot.expressions.Sign'>, <class 'sqlglot.expressions.DatetimeDiff'>, <class 'sqlglot.expressions.TimeDiff'>, <class 'sqlglot.expressions.StrPosition'>, <class 'sqlglot.expressions.TsOrDiToDi'>, <class 'sqlglot.expressions.Levenshtein'>, <class 'sqlglot.expressions.DateToDi'>, <class 'sqlglot.expressions.TimestampDiff'>, <class 'sqlglot.expressions.Ceil'>, <class 'sqlglot.expressions.DateDiff'>}, <Type.JSON: 'JSON'>: {<class 'sqlglot.expressions.ParseJSON'>}, <Type.TIME: 'TIME'>: {<class 'sqlglot.expressions.Time'>}, <Type.TIMESTAMP: 'TIMESTAMP'>: {<class 'sqlglot.expressions.StrToTime'>, <class 'sqlglot.expressions.TimeAdd'>, <class 'sqlglot.expressions.TimeSub'>, <class 'sqlglot.expressions.TimeStrToTime'>, <class 'sqlglot.expressions.TimestampSub'>, <class 'sqlglot.expressions.TimestampAdd'>, <class 'sqlglot.expressions.CurrentTimestamp'>, <class 'sqlglot.expressions.UnixToTime'>, <class 'sqlglot.expressions.CurrentTime'>}, <Type.TINYINT: 'TINYINT'>: {<class 'sqlglot.expressions.Quarter'>, <class 'sqlglot.expressions.Month'>, <class 'sqlglot.expressions.Day'>, <class 'sqlglot.expressions.Year'>, <class 'sqlglot.expressions.Week'>}, <Type.VARCHAR: 'VARCHAR'>: {<class 'sqlglot.expressions.ConcatWs'>, <class 'sqlglot.expressions.TimeToTimeStr'>, <class 'sqlglot.expressions.Trim'>, <class 'sqlglot.expressions.Concat'>, <class 'sqlglot.expressions.GroupConcat'>, <class 'sqlglot.expressions.TimeToStr'>, <class 'sqlglot.expressions.DateToDateStr'>, <class 'sqlglot.expressions.Substring'>, <class 'sqlglot.expressions.Upper'>, <class 'sqlglot.expressions.Initcap'>, <class 'sqlglot.expressions.Lower'>, <class 'sqlglot.expressions.ArrayConcat'>, <class 'sqlglot.expressions.UnixToStr'>, <class 'sqlglot.expressions.UnixToTimeStr'>, <class 'sqlglot.expressions.TsOrDsToDateStr'>}}
ANNOTATORS: Dict[Type[~E], Callable[[sqlglot.optimizer.annotate_types.TypeAnnotator, ~E], ~E]] = {<class 'sqlglot.expressions.Alias'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseNot'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Neg'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Not'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Paren'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.PivotAlias'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Unary'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Add'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.And'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ArrayContains'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ArrayContainsAll'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ArrayOverlaps'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Binary'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseAnd'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseLeftShift'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseOr'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseRightShift'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.BitwiseXor'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Collate'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Connector'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Corr'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.CovarPop'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.CovarSamp'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.DPipe'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Distance'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Div'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Dot'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.EQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Escape'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.GT'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.GTE'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Glob'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ILike'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ILikeAny'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.IntDiv'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Is'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONArrayContains'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONBContains'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONBExtract'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONBExtractScalar'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Kwarg'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.LT'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.LTE'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Like'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.LikeAny'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Mod'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Mul'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.NEQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.NullSafeEQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.NullSafeNEQ'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Operator'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Or'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Overlaps'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Pow'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.PropertyEQ'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.RegexpILike'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.RegexpLike'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.SimilarTo'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Slice'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Sub'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.Xor'>: <function Dialect.<dictcomp>.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Count'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Length'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Boolean'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Between'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.In'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StrToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Date'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateStrToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateFromParts'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DatetimeSub'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Datetime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentDatetime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DatetimeAdd'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Log'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Ln'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Quantile'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.VariancePop'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.SafeDivide'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Variance'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Avg'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StddevSamp'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Round'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StddevPop'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Exp'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Stddev'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Sqrt'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Sign'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DatetimeDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Levenshtein'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateToDi'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimestampDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Ceil'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateDiff'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ParseJSON'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Time'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.StrToTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeAdd'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeSub'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimestampSub'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimestampAdd'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentTimestamp'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.CurrentTime'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Quarter'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Month'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Day'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Year'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Week'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ConcatWs'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToTimeStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Trim'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Concat'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.GroupConcat'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.DateToDateStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Substring'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Upper'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Initcap'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Lower'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayConcat'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.UnixToStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDateStr'>: <function _annotate_with_type_lambda.<locals>.<lambda>>, <class 'sqlglot.expressions.Abs'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Anonymous'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Array'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.ArrayAgg'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Bracket'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Cast'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Case'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Coalesce'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DataType'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DateAdd'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.DateTrunc'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Distinct'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Explode'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Extract'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Filter'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.GenerateDateArray'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.GenerateTimestampArray'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.If'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Interval'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Least'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Literal'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Map'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Max'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Min'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Null'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Nullif'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Struct'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Sum'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Timestamp'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.ToMap'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.TryCast'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.Unnest'>: <function Dialect.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function Dialect.<lambda>>}
@classmethod
def get_or_raise( cls, dialect: Union[str, Dialect, Type[Dialect], NoneType]) -> Dialect:
692    @classmethod
693    def get_or_raise(cls, dialect: DialectType) -> Dialect:
694        """
695        Look up a dialect in the global dialect registry and return it if it exists.
696
697        Args:
698            dialect: The target dialect. If this is a string, it can be optionally followed by
699                additional key-value pairs that are separated by commas and are used to specify
700                dialect settings, such as whether the dialect's identifiers are case-sensitive.
701
702        Example:
703            >>> dialect = dialect_class = get_or_raise("duckdb")
704            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
705
706        Returns:
707            The corresponding Dialect instance.
708        """
709
710        if not dialect:
711            return cls()
712        if isinstance(dialect, _Dialect):
713            return dialect()
714        if isinstance(dialect, Dialect):
715            return dialect
716        if isinstance(dialect, str):
717            try:
718                dialect_name, *kv_strings = dialect.split(",")
719                kv_pairs = (kv.split("=") for kv in kv_strings)
720                kwargs = {}
721                for pair in kv_pairs:
722                    key = pair[0].strip()
723                    value: t.Union[bool | str | None] = None
724
725                    if len(pair) == 1:
726                        # Default initialize standalone settings to True
727                        value = True
728                    elif len(pair) == 2:
729                        value = pair[1].strip()
730
731                        # Coerce the value to boolean if it matches to the truthy/falsy values below
732                        value_lower = value.lower()
733                        if value_lower in ("true", "1"):
734                            value = True
735                        elif value_lower in ("false", "0"):
736                            value = False
737
738                    kwargs[key] = value
739
740            except ValueError:
741                raise ValueError(
742                    f"Invalid dialect format: '{dialect}'. "
743                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
744                )
745
746            result = cls.get(dialect_name.strip())
747            if not result:
748                from difflib import get_close_matches
749
750                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
751                if similar:
752                    similar = f" Did you mean {similar}?"
753
754                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
755
756            return result(**kwargs)
757
758        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")

Look up a dialect in the global dialect registry and return it if it exists.

Arguments:
  • dialect: The target dialect. If this is a string, it can be optionally followed by additional key-value pairs that are separated by commas and are used to specify dialect settings, such as whether the dialect's identifiers are case-sensitive.
Example:
>>> dialect = dialect_class = get_or_raise("duckdb")
>>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
Returns:

The corresponding Dialect instance.

@classmethod
def format_time( cls, expression: Union[str, sqlglot.expressions.Expression, NoneType]) -> Optional[sqlglot.expressions.Expression]:
760    @classmethod
761    def format_time(
762        cls, expression: t.Optional[str | exp.Expression]
763    ) -> t.Optional[exp.Expression]:
764        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
765        if isinstance(expression, str):
766            return exp.Literal.string(
767                # the time formats are quoted
768                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
769            )
770
771        if expression and expression.is_string:
772            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
773
774        return expression

Converts a time format in this dialect to its equivalent Python strftime format.

settings
def normalize_identifier(self, expression: ~E) -> ~E:
794    def normalize_identifier(self, expression: E) -> E:
795        """
796        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
797
798        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
799        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
800        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
801        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
802
803        There are also dialects like Spark, which are case-insensitive even when quotes are
804        present, and dialects like MySQL, whose resolution rules match those employed by the
805        underlying operating system, for example they may always be case-sensitive in Linux.
806
807        Finally, the normalization behavior of some engines can even be controlled through flags,
808        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
809
810        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
811        that it can analyze queries in the optimizer and successfully capture their semantics.
812        """
813        if (
814            isinstance(expression, exp.Identifier)
815            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
816            and (
817                not expression.quoted
818                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
819            )
820        ):
821            expression.set(
822                "this",
823                (
824                    expression.this.upper()
825                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
826                    else expression.this.lower()
827                ),
828            )
829
830        return expression

Transforms an identifier in a way that resembles how it'd be resolved by this dialect.

For example, an identifier like FoO would be resolved as foo in Postgres, because it lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive, and so any normalization would be prohibited in order to avoid "breaking" the identifier.

There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.

Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.

SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.

def case_sensitive(self, text: str) -> bool:
832    def case_sensitive(self, text: str) -> bool:
833        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
834        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
835            return False
836
837        unsafe = (
838            str.islower
839            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
840            else str.isupper
841        )
842        return any(unsafe(char) for char in text)

Checks if text contains any case sensitive characters, based on the dialect's rules.

def can_identify(self, text: str, identify: str | bool = 'safe') -> bool:
844    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
845        """Checks if text can be identified given an identify option.
846
847        Args:
848            text: The text to check.
849            identify:
850                `"always"` or `True`: Always returns `True`.
851                `"safe"`: Only returns `True` if the identifier is case-insensitive.
852
853        Returns:
854            Whether the given text can be identified.
855        """
856        if identify is True or identify == "always":
857            return True
858
859        if identify == "safe":
860            return not self.case_sensitive(text)
861
862        return False

Checks if text can be identified given an identify option.

Arguments:
  • text: The text to check.
  • identify: "always" or True: Always returns True. "safe": Only returns True if the identifier is case-insensitive.
Returns:

Whether the given text can be identified.

def quote_identifier(self, expression: ~E, identify: bool = True) -> ~E:
864    def quote_identifier(self, expression: E, identify: bool = True) -> E:
865        """
866        Adds quotes to a given identifier.
867
868        Args:
869            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
870            identify: If set to `False`, the quotes will only be added if the identifier is deemed
871                "unsafe", with respect to its characters and this dialect's normalization strategy.
872        """
873        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
874            name = expression.this
875            expression.set(
876                "quoted",
877                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
878            )
879
880        return expression

Adds quotes to a given identifier.

Arguments:
  • expression: The expression of interest. If it's not an Identifier, this method is a no-op.
  • identify: If set to False, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
def to_json_path( self, path: Optional[sqlglot.expressions.Expression]) -> Optional[sqlglot.expressions.Expression]:
882    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
883        if isinstance(path, exp.Literal):
884            path_text = path.name
885            if path.is_number:
886                path_text = f"[{path_text}]"
887            try:
888                return parse_json_path(path_text, self)
889            except ParseError as e:
890                logger.warning(f"Invalid JSON path syntax. {str(e)}")
891
892        return path
def parse(self, sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
894    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
895        return self.parser(**opts).parse(self.tokenize(sql), sql)
def parse_into( self, expression_type: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
897    def parse_into(
898        self, expression_type: exp.IntoType, sql: str, **opts
899    ) -> t.List[t.Optional[exp.Expression]]:
900        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
def generate( self, expression: sqlglot.expressions.Expression, copy: bool = True, **opts) -> str:
902    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
903        return self.generator(**opts).generate(expression, copy=copy)
def transpile(self, sql: str, **opts) -> List[str]:
905    def transpile(self, sql: str, **opts) -> t.List[str]:
906        return [
907            self.generate(expression, copy=False, **opts) if expression else ""
908            for expression in self.parse(sql)
909        ]
def tokenize(self, sql: str) -> List[sqlglot.tokens.Token]:
911    def tokenize(self, sql: str) -> t.List[Token]:
912        return self.tokenizer.tokenize(sql)
tokenizer: sqlglot.tokens.Tokenizer
914    @property
915    def tokenizer(self) -> Tokenizer:
916        return self.tokenizer_class(dialect=self)
jsonpath_tokenizer: sqlglot.jsonpath.JSONPathTokenizer
918    @property
919    def jsonpath_tokenizer(self) -> JSONPathTokenizer:
920        return self.jsonpath_tokenizer_class(dialect=self)
def parser(self, **opts) -> sqlglot.parser.Parser:
922    def parser(self, **opts) -> Parser:
923        return self.parser_class(dialect=self, **opts)
def generator(self, **opts) -> sqlglot.generator.Generator:
925    def generator(self, **opts) -> Generator:
926        return self.generator_class(dialect=self, **opts)
DialectType = typing.Union[str, Dialect, typing.Type[Dialect], NoneType]
def rename_func( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
932def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
933    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
def approx_count_distinct_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ApproxDistinct) -> str:
936def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
937    if expression.args.get("accuracy"):
938        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
939    return self.func("APPROX_COUNT_DISTINCT", expression.this)
def if_sql( name: str = 'IF', false_value: Union[str, sqlglot.expressions.Expression, NoneType] = None) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.If], str]:
942def if_sql(
943    name: str = "IF", false_value: t.Optional[exp.Expression | str] = None
944) -> t.Callable[[Generator, exp.If], str]:
945    def _if_sql(self: Generator, expression: exp.If) -> str:
946        return self.func(
947            name,
948            expression.this,
949            expression.args.get("true"),
950            expression.args.get("false") or false_value,
951        )
952
953    return _if_sql
def arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: Union[sqlglot.expressions.JSONExtract, sqlglot.expressions.JSONExtractScalar]) -> str:
956def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
957    this = expression.this
958    if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string:
959        this.replace(exp.cast(this, exp.DataType.Type.JSON))
960
961    return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>")
def inline_array_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
964def inline_array_sql(self: Generator, expression: exp.Array) -> str:
965    return f"[{self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)}]"
def inline_array_unless_query( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
968def inline_array_unless_query(self: Generator, expression: exp.Array) -> str:
969    elem = seq_get(expression.expressions, 0)
970    if isinstance(elem, exp.Expression) and elem.find(exp.Query):
971        return self.func("ARRAY", elem)
972    return inline_array_sql(self, expression)
def no_ilike_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ILike) -> str:
975def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
976    return self.like_sql(
977        exp.Like(
978            this=exp.Lower(this=expression.this), expression=exp.Lower(this=expression.expression)
979        )
980    )
def no_paren_current_date_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CurrentDate) -> str:
983def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
984    zone = self.sql(expression, "this")
985    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
def no_recursive_cte_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.With) -> str:
988def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
989    if expression.args.get("recursive"):
990        self.unsupported("Recursive CTEs are unsupported")
991        expression.args["recursive"] = False
992    return self.with_sql(expression)
def no_safe_divide_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SafeDivide) -> str:
995def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
996    n = self.sql(expression, "this")
997    d = self.sql(expression, "expression")
998    return f"IF(({d}) <> 0, ({n}) / ({d}), NULL)"
def no_tablesample_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TableSample) -> str:
1001def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
1002    self.unsupported("TABLESAMPLE unsupported")
1003    return self.sql(expression.this)
def no_pivot_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Pivot) -> str:
1006def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
1007    self.unsupported("PIVOT unsupported")
1008    return ""
def no_trycast_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TryCast) -> str:
1011def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
1012    return self.cast_sql(expression)
def no_comment_column_constraint_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CommentColumnConstraint) -> str:
1015def no_comment_column_constraint_sql(
1016    self: Generator, expression: exp.CommentColumnConstraint
1017) -> str:
1018    self.unsupported("CommentColumnConstraint unsupported")
1019    return ""
def no_map_from_entries_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.MapFromEntries) -> str:
1022def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str:
1023    self.unsupported("MAP_FROM_ENTRIES unsupported")
1024    return ""
def str_position_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition, generate_instance: bool = False) -> str:
1027def str_position_sql(
1028    self: Generator, expression: exp.StrPosition, generate_instance: bool = False
1029) -> str:
1030    this = self.sql(expression, "this")
1031    substr = self.sql(expression, "substr")
1032    position = self.sql(expression, "position")
1033    instance = expression.args.get("instance") if generate_instance else None
1034    position_offset = ""
1035
1036    if position:
1037        # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects
1038        this = self.func("SUBSTR", this, position)
1039        position_offset = f" + {position} - 1"
1040
1041    return self.func("STRPOS", this, substr, instance) + position_offset
def struct_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StructExtract) -> str:
1044def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
1045    return (
1046        f"{self.sql(expression, 'this')}.{self.sql(exp.to_identifier(expression.expression.name))}"
1047    )
def var_map_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Map | sqlglot.expressions.VarMap, map_func_name: str = 'MAP') -> str:
1050def var_map_sql(
1051    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
1052) -> str:
1053    keys = expression.args["keys"]
1054    values = expression.args["values"]
1055
1056    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
1057        self.unsupported("Cannot convert array columns into map.")
1058        return self.func(map_func_name, keys, values)
1059
1060    args = []
1061    for key, value in zip(keys.expressions, values.expressions):
1062        args.append(self.sql(key))
1063        args.append(self.sql(value))
1064
1065    return self.func(map_func_name, *args)
def build_formatted_time( exp_class: Type[~E], dialect: str, default: Union[str, bool, NoneType] = None) -> Callable[[List], ~E]:
1068def build_formatted_time(
1069    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
1070) -> t.Callable[[t.List], E]:
1071    """Helper used for time expressions.
1072
1073    Args:
1074        exp_class: the expression class to instantiate.
1075        dialect: target sql dialect.
1076        default: the default format, True being time.
1077
1078    Returns:
1079        A callable that can be used to return the appropriately formatted time expression.
1080    """
1081
1082    def _builder(args: t.List):
1083        return exp_class(
1084            this=seq_get(args, 0),
1085            format=Dialect[dialect].format_time(
1086                seq_get(args, 1)
1087                or (Dialect[dialect].TIME_FORMAT if default is True else default or None)
1088            ),
1089        )
1090
1091    return _builder

Helper used for time expressions.

Arguments:
  • exp_class: the expression class to instantiate.
  • dialect: target sql dialect.
  • default: the default format, True being time.
Returns:

A callable that can be used to return the appropriately formatted time expression.

def time_format( dialect: Union[str, Dialect, Type[Dialect], NoneType] = None) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.UnixToStr | sqlglot.expressions.StrToUnix], Optional[str]]:
1094def time_format(
1095    dialect: DialectType = None,
1096) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]:
1097    def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]:
1098        """
1099        Returns the time format for a given expression, unless it's equivalent
1100        to the default time format of the dialect of interest.
1101        """
1102        time_format = self.format_time(expression)
1103        return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None
1104
1105    return _time_format
def build_date_delta( exp_class: Type[~E], unit_mapping: Optional[Dict[str, str]] = None, default_unit: Optional[str] = 'DAY') -> Callable[[List], ~E]:
1108def build_date_delta(
1109    exp_class: t.Type[E],
1110    unit_mapping: t.Optional[t.Dict[str, str]] = None,
1111    default_unit: t.Optional[str] = "DAY",
1112) -> t.Callable[[t.List], E]:
1113    def _builder(args: t.List) -> E:
1114        unit_based = len(args) == 3
1115        this = args[2] if unit_based else seq_get(args, 0)
1116        unit = None
1117        if unit_based or default_unit:
1118            unit = args[0] if unit_based else exp.Literal.string(default_unit)
1119            unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit
1120        return exp_class(this=this, expression=seq_get(args, 1), unit=unit)
1121
1122    return _builder
def build_date_delta_with_interval(expression_class: Type[~E]) -> Callable[[List], Optional[~E]]:
1125def build_date_delta_with_interval(
1126    expression_class: t.Type[E],
1127) -> t.Callable[[t.List], t.Optional[E]]:
1128    def _builder(args: t.List) -> t.Optional[E]:
1129        if len(args) < 2:
1130            return None
1131
1132        interval = args[1]
1133
1134        if not isinstance(interval, exp.Interval):
1135            raise ParseError(f"INTERVAL expression expected but got '{interval}'")
1136
1137        expression = interval.this
1138        if expression and expression.is_string:
1139            expression = exp.Literal.number(expression.this)
1140
1141        return expression_class(this=args[0], expression=expression, unit=unit_to_str(interval))
1142
1143    return _builder
def date_trunc_to_time( args: List) -> sqlglot.expressions.DateTrunc | sqlglot.expressions.TimestampTrunc:
1146def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc:
1147    unit = seq_get(args, 0)
1148    this = seq_get(args, 1)
1149
1150    if isinstance(this, exp.Cast) and this.is_type("date"):
1151        return exp.DateTrunc(unit=unit, this=this)
1152    return exp.TimestampTrunc(this=this, unit=unit)
def date_add_interval_sql( data_type: str, kind: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
1155def date_add_interval_sql(
1156    data_type: str, kind: str
1157) -> t.Callable[[Generator, exp.Expression], str]:
1158    def func(self: Generator, expression: exp.Expression) -> str:
1159        this = self.sql(expression, "this")
1160        interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression))
1161        return f"{data_type}_{kind}({this}, {self.sql(interval)})"
1162
1163    return func
def timestamptrunc_sql( zone: bool = False) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.TimestampTrunc], str]:
1166def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]:
1167    def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str:
1168        args = [unit_to_str(expression), expression.this]
1169        if zone:
1170            args.append(expression.args.get("zone"))
1171        return self.func("DATE_TRUNC", *args)
1172
1173    return _timestamptrunc_sql
def no_timestamp_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Timestamp) -> str:
1176def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str:
1177    zone = expression.args.get("zone")
1178    if not zone:
1179        from sqlglot.optimizer.annotate_types import annotate_types
1180
1181        target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP
1182        return self.sql(exp.cast(expression.this, target_type))
1183    if zone.name.lower() in TIMEZONES:
1184        return self.sql(
1185            exp.AtTimeZone(
1186                this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP),
1187                zone=zone,
1188            )
1189        )
1190    return self.func("TIMESTAMP", expression.this, zone)
def no_time_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Time) -> str:
1193def no_time_sql(self: Generator, expression: exp.Time) -> str:
1194    # Transpile BQ's TIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIME)
1195    this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)
1196    expr = exp.cast(
1197        exp.AtTimeZone(this=this, zone=expression.args.get("zone")), exp.DataType.Type.TIME
1198    )
1199    return self.sql(expr)
def no_datetime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Datetime) -> str:
1202def no_datetime_sql(self: Generator, expression: exp.Datetime) -> str:
1203    this = expression.this
1204    expr = expression.expression
1205
1206    if expr.name.lower() in TIMEZONES:
1207        # Transpile BQ's DATETIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIMESTAMP)
1208        this = exp.cast(this, exp.DataType.Type.TIMESTAMPTZ)
1209        this = exp.cast(exp.AtTimeZone(this=this, zone=expr), exp.DataType.Type.TIMESTAMP)
1210        return self.sql(this)
1211
1212    this = exp.cast(this, exp.DataType.Type.DATE)
1213    expr = exp.cast(expr, exp.DataType.Type.TIME)
1214
1215    return self.sql(exp.cast(exp.Add(this=this, expression=expr), exp.DataType.Type.TIMESTAMP))
def locate_to_strposition(args: List) -> sqlglot.expressions.Expression:
1218def locate_to_strposition(args: t.List) -> exp.Expression:
1219    return exp.StrPosition(
1220        this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2)
1221    )
def strposition_to_locate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str:
1224def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
1225    return self.func(
1226        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
1227    )
def left_to_substring_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Left) -> str:
1230def left_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1231    return self.sql(
1232        exp.Substring(
1233            this=expression.this, start=exp.Literal.number(1), length=expression.expression
1234        )
1235    )
def right_to_substring_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Left) -> str:
1238def right_to_substring_sql(self: Generator, expression: exp.Left) -> str:
1239    return self.sql(
1240        exp.Substring(
1241            this=expression.this,
1242            start=exp.Length(this=expression.this) - exp.paren(expression.expression - 1),
1243        )
1244    )
def timestrtotime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TimeStrToTime) -> str:
1247def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
1248    datatype = (
1249        exp.DataType.Type.TIMESTAMPTZ
1250        if expression.args.get("zone")
1251        else exp.DataType.Type.TIMESTAMP
1252    )
1253
1254    return self.sql(exp.cast(expression.this, datatype, dialect=self.dialect))
def datestrtodate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.DateStrToDate) -> str:
1257def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
1258    return self.sql(exp.cast(expression.this, exp.DataType.Type.DATE))
def encode_decode_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Expression, name: str, replace: bool = True) -> str:
1262def encode_decode_sql(
1263    self: Generator, expression: exp.Expression, name: str, replace: bool = True
1264) -> str:
1265    charset = expression.args.get("charset")
1266    if charset and charset.name.lower() != "utf-8":
1267        self.unsupported(f"Expected utf-8 character set, got {charset}.")
1268
1269    return self.func(name, expression.this, expression.args.get("replace") if replace else None)
def min_or_least( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Min) -> str:
1272def min_or_least(self: Generator, expression: exp.Min) -> str:
1273    name = "LEAST" if expression.expressions else "MIN"
1274    return rename_func(name)(self, expression)
def max_or_greatest( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Max) -> str:
1277def max_or_greatest(self: Generator, expression: exp.Max) -> str:
1278    name = "GREATEST" if expression.expressions else "MAX"
1279    return rename_func(name)(self, expression)
def count_if_to_sum( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CountIf) -> str:
1282def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str:
1283    cond = expression.this
1284
1285    if isinstance(expression.this, exp.Distinct):
1286        cond = expression.this.expressions[0]
1287        self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM")
1288
1289    return self.func("sum", exp.func("if", cond, 1, 0))
def trim_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Trim) -> str:
1292def trim_sql(self: Generator, expression: exp.Trim) -> str:
1293    target = self.sql(expression, "this")
1294    trim_type = self.sql(expression, "position")
1295    remove_chars = self.sql(expression, "expression")
1296    collation = self.sql(expression, "collation")
1297
1298    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
1299    if not remove_chars:
1300        return self.trim_sql(expression)
1301
1302    trim_type = f"{trim_type} " if trim_type else ""
1303    remove_chars = f"{remove_chars} " if remove_chars else ""
1304    from_part = "FROM " if trim_type or remove_chars else ""
1305    collation = f" COLLATE {collation}" if collation else ""
1306    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
def str_to_time_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Expression) -> str:
1309def str_to_time_sql(self: Generator, expression: exp.Expression) -> str:
1310    return self.func("STRPTIME", expression.this, self.format_time(expression))
def concat_to_dpipe_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Concat) -> str:
1313def concat_to_dpipe_sql(self: Generator, expression: exp.Concat) -> str:
1314    return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions))
def concat_ws_to_dpipe_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ConcatWs) -> str:
1317def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str:
1318    delim, *rest_args = expression.expressions
1319    return self.sql(
1320        reduce(
1321            lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)),
1322            rest_args,
1323        )
1324    )
def regexp_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.RegexpExtract) -> str:
1327def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str:
1328    bad_args = list(filter(expression.args.get, ("position", "occurrence", "parameters")))
1329    if bad_args:
1330        self.unsupported(f"REGEXP_EXTRACT does not support the following arg(s): {bad_args}")
1331
1332    return self.func(
1333        "REGEXP_EXTRACT", expression.this, expression.expression, expression.args.get("group")
1334    )
def regexp_replace_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.RegexpReplace) -> str:
1337def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str:
1338    bad_args = list(filter(expression.args.get, ("position", "occurrence", "modifiers")))
1339    if bad_args:
1340        self.unsupported(f"REGEXP_REPLACE does not support the following arg(s): {bad_args}")
1341
1342    return self.func(
1343        "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"]
1344    )
def pivot_column_names( aggregations: List[sqlglot.expressions.Expression], dialect: Union[str, Dialect, Type[Dialect], NoneType]) -> List[str]:
1347def pivot_column_names(aggregations: t.List[exp.Expression], dialect: DialectType) -> t.List[str]:
1348    names = []
1349    for agg in aggregations:
1350        if isinstance(agg, exp.Alias):
1351            names.append(agg.alias)
1352        else:
1353            """
1354            This case corresponds to aggregations without aliases being used as suffixes
1355            (e.g. col_avg(foo)). We need to unquote identifiers because they're going to
1356            be quoted in the base parser's `_parse_pivot` method, due to `to_identifier`.
1357            Otherwise, we'd end up with `col_avg(`foo`)` (notice the double quotes).
1358            """
1359            agg_all_unquoted = agg.transform(
1360                lambda node: (
1361                    exp.Identifier(this=node.name, quoted=False)
1362                    if isinstance(node, exp.Identifier)
1363                    else node
1364                )
1365            )
1366            names.append(agg_all_unquoted.sql(dialect=dialect, normalize_functions="lower"))
1367
1368    return names
def binary_from_function(expr_type: Type[~B]) -> Callable[[List], ~B]:
1371def binary_from_function(expr_type: t.Type[B]) -> t.Callable[[t.List], B]:
1372    return lambda args: expr_type(this=seq_get(args, 0), expression=seq_get(args, 1))
def build_timestamp_trunc(args: List) -> sqlglot.expressions.TimestampTrunc:
1376def build_timestamp_trunc(args: t.List) -> exp.TimestampTrunc:
1377    return exp.TimestampTrunc(this=seq_get(args, 1), unit=seq_get(args, 0))
def any_value_to_max_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.AnyValue) -> str:
1380def any_value_to_max_sql(self: Generator, expression: exp.AnyValue) -> str:
1381    return self.func("MAX", expression.this)
def bool_xor_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Xor) -> str:
1384def bool_xor_sql(self: Generator, expression: exp.Xor) -> str:
1385    a = self.sql(expression.left)
1386    b = self.sql(expression.right)
1387    return f"({a} AND (NOT {b})) OR ((NOT {a}) AND {b})"
def is_parse_json(expression: sqlglot.expressions.Expression) -> bool:
1390def is_parse_json(expression: exp.Expression) -> bool:
1391    return isinstance(expression, exp.ParseJSON) or (
1392        isinstance(expression, exp.Cast) and expression.is_type("json")
1393    )
def isnull_to_is_null(args: List) -> sqlglot.expressions.Expression:
1396def isnull_to_is_null(args: t.List) -> exp.Expression:
1397    return exp.Paren(this=exp.Is(this=seq_get(args, 0), expression=exp.null()))
def generatedasidentitycolumnconstraint_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.GeneratedAsIdentityColumnConstraint) -> str:
1400def generatedasidentitycolumnconstraint_sql(
1401    self: Generator, expression: exp.GeneratedAsIdentityColumnConstraint
1402) -> str:
1403    start = self.sql(expression, "start") or "1"
1404    increment = self.sql(expression, "increment") or "1"
1405    return f"IDENTITY({start}, {increment})"
def arg_max_or_min_no_count( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.ArgMax | sqlglot.expressions.ArgMin], str]:
1408def arg_max_or_min_no_count(name: str) -> t.Callable[[Generator, exp.ArgMax | exp.ArgMin], str]:
1409    def _arg_max_or_min_sql(self: Generator, expression: exp.ArgMax | exp.ArgMin) -> str:
1410        if expression.args.get("count"):
1411            self.unsupported(f"Only two arguments are supported in function {name}.")
1412
1413        return self.func(name, expression.this, expression.expression)
1414
1415    return _arg_max_or_min_sql
def ts_or_ds_add_cast( expression: sqlglot.expressions.TsOrDsAdd) -> sqlglot.expressions.TsOrDsAdd:
1418def ts_or_ds_add_cast(expression: exp.TsOrDsAdd) -> exp.TsOrDsAdd:
1419    this = expression.this.copy()
1420
1421    return_type = expression.return_type
1422    if return_type.is_type(exp.DataType.Type.DATE):
1423        # If we need to cast to a DATE, we cast to TIMESTAMP first to make sure we
1424        # can truncate timestamp strings, because some dialects can't cast them to DATE
1425        this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
1426
1427    expression.this.replace(exp.cast(this, return_type))
1428    return expression
def date_delta_sql( name: str, cast: bool = False) -> Callable[[sqlglot.generator.Generator, Union[sqlglot.expressions.DateAdd, sqlglot.expressions.TsOrDsAdd, sqlglot.expressions.DateDiff, sqlglot.expressions.TsOrDsDiff]], str]:
1431def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE_ADD_OR_DIFF], str]:
1432    def _delta_sql(self: Generator, expression: DATE_ADD_OR_DIFF) -> str:
1433        if cast and isinstance(expression, exp.TsOrDsAdd):
1434            expression = ts_or_ds_add_cast(expression)
1435
1436        return self.func(
1437            name,
1438            unit_to_var(expression),
1439            expression.expression,
1440            expression.this,
1441        )
1442
1443    return _delta_sql
def unit_to_str( expression: sqlglot.expressions.Expression, default: str = 'DAY') -> Optional[sqlglot.expressions.Expression]:
1446def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1447    unit = expression.args.get("unit")
1448
1449    if isinstance(unit, exp.Placeholder):
1450        return unit
1451    if unit:
1452        return exp.Literal.string(unit.name)
1453    return exp.Literal.string(default) if default else None
def unit_to_var( expression: sqlglot.expressions.Expression, default: str = 'DAY') -> Optional[sqlglot.expressions.Expression]:
1456def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1457    unit = expression.args.get("unit")
1458
1459    if isinstance(unit, (exp.Var, exp.Placeholder)):
1460        return unit
1461    return exp.Var(this=default) if default else None
def map_date_part( part, dialect: Union[str, Dialect, Type[Dialect], NoneType] = <class 'Dialect'>):
1476def map_date_part(part, dialect: DialectType = Dialect):
1477    mapped = (
1478        Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None
1479    )
1480    return exp.var(mapped) if mapped else part
def no_last_day_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.LastDay) -> str:
1483def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:
1484    trunc_curr_date = exp.func("date_trunc", "month", expression.this)
1485    plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month")
1486    minus_one_day = exp.func("date_sub", plus_one_month, 1, "day")
1487
1488    return self.sql(exp.cast(minus_one_day, exp.DataType.Type.DATE))
def merge_without_target_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Merge) -> str:
1491def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
1492    """Remove table refs from columns in when statements."""
1493    alias = expression.this.args.get("alias")
1494
1495    def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]:
1496        return self.dialect.normalize_identifier(identifier).name if identifier else None
1497
1498    targets = {normalize(expression.this.this)}
1499
1500    if alias:
1501        targets.add(normalize(alias.this))
1502
1503    for when in expression.expressions:
1504        # only remove the target names from the THEN clause
1505        # theyre still valid in the <condition> part of WHEN MATCHED / WHEN NOT MATCHED
1506        # ref: https://github.com/TobikoData/sqlmesh/issues/2934
1507        then = when.args.get("then")
1508        if then:
1509            then.transform(
1510                lambda node: (
1511                    exp.column(node.this)
1512                    if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets
1513                    else node
1514                ),
1515                copy=False,
1516            )
1517
1518    return self.merge_sql(expression)

Remove table refs from columns in when statements.

def build_json_extract_path( expr_type: Type[~F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False) -> Callable[[List], ~F]:
1521def build_json_extract_path(
1522    expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
1523) -> t.Callable[[t.List], F]:
1524    def _builder(args: t.List) -> F:
1525        segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
1526        for arg in args[1:]:
1527            if not isinstance(arg, exp.Literal):
1528                # We use the fallback parser because we can't really transpile non-literals safely
1529                return expr_type.from_arg_list(args)
1530
1531            text = arg.name
1532            if is_int(text):
1533                index = int(text)
1534                segments.append(
1535                    exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1)
1536                )
1537            else:
1538                segments.append(exp.JSONPathKey(this=text))
1539
1540        # This is done to avoid failing in the expression validator due to the arg count
1541        del args[2:]
1542        return expr_type(
1543            this=seq_get(args, 0),
1544            expression=exp.JSONPath(expressions=segments),
1545            only_json_types=arrow_req_json_type,
1546        )
1547
1548    return _builder
def json_extract_segments( name: str, quoted_index: bool = True, op: Optional[str] = None) -> Callable[[sqlglot.generator.Generator, Union[sqlglot.expressions.JSONExtract, sqlglot.expressions.JSONExtractScalar]], str]:
1551def json_extract_segments(
1552    name: str, quoted_index: bool = True, op: t.Optional[str] = None
1553) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]:
1554    def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
1555        path = expression.expression
1556        if not isinstance(path, exp.JSONPath):
1557            return rename_func(name)(self, expression)
1558
1559        segments = []
1560        for segment in path.expressions:
1561            path = self.sql(segment)
1562            if path:
1563                if isinstance(segment, exp.JSONPathPart) and (
1564                    quoted_index or not isinstance(segment, exp.JSONPathSubscript)
1565                ):
1566                    path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}"
1567
1568                segments.append(path)
1569
1570        if op:
1571            return f" {op} ".join([self.sql(expression.this), *segments])
1572        return self.func(name, expression.this, *segments)
1573
1574    return _json_extract_segments
def json_path_key_only_name( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONPathKey) -> str:
1577def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str:
1578    if isinstance(expression.this, exp.JSONPathWildcard):
1579        self.unsupported("Unsupported wildcard in JSONPathKey expression")
1580
1581    return expression.name
def filter_array_using_unnest( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ArrayFilter) -> str:
1584def filter_array_using_unnest(self: Generator, expression: exp.ArrayFilter) -> str:
1585    cond = expression.expression
1586    if isinstance(cond, exp.Lambda) and len(cond.expressions) == 1:
1587        alias = cond.expressions[0]
1588        cond = cond.this
1589    elif isinstance(cond, exp.Predicate):
1590        alias = "_u"
1591    else:
1592        self.unsupported("Unsupported filter condition")
1593        return ""
1594
1595    unnest = exp.Unnest(expressions=[expression.this])
1596    filtered = exp.select(alias).from_(exp.alias_(unnest, None, table=[alias])).where(cond)
1597    return self.sql(exp.Array(expressions=[filtered]))
def to_number_with_nls_param( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ToNumber) -> str:
1600def to_number_with_nls_param(self: Generator, expression: exp.ToNumber) -> str:
1601    return self.func(
1602        "TO_NUMBER",
1603        expression.this,
1604        expression.args.get("format"),
1605        expression.args.get("nlsparam"),
1606    )
def build_default_decimal_type( precision: Optional[int] = None, scale: Optional[int] = None) -> Callable[[sqlglot.expressions.DataType], sqlglot.expressions.DataType]:
1609def build_default_decimal_type(
1610    precision: t.Optional[int] = None, scale: t.Optional[int] = None
1611) -> t.Callable[[exp.DataType], exp.DataType]:
1612    def _builder(dtype: exp.DataType) -> exp.DataType:
1613        if dtype.expressions or precision is None:
1614            return dtype
1615
1616        params = f"{precision}{f', {scale}' if scale is not None else ''}"
1617        return exp.DataType.build(f"DECIMAL({params})")
1618
1619    return _builder
def build_timestamp_from_parts(args: List) -> sqlglot.expressions.Func:
1622def build_timestamp_from_parts(args: t.List) -> exp.Func:
1623    if len(args) == 2:
1624        # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept,
1625        # so we parse this into Anonymous for now instead of introducing complexity
1626        return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args)
1627
1628    return exp.TimestampFromParts.from_arg_list(args)
def sha256_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SHA2) -> str:
1631def sha256_sql(self: Generator, expression: exp.SHA2) -> str:
1632    return self.func(f"SHA{expression.text('length') or '256'}", expression.this)
1635def sequence_sql(self: Generator, expression: exp.GenerateSeries | exp.GenerateDateArray) -> str:
1636    start = expression.args.get("start")
1637    end = expression.args.get("end")
1638    step = expression.args.get("step")
1639
1640    if isinstance(start, exp.Cast):
1641        target_type = start.to
1642    elif isinstance(end, exp.Cast):
1643        target_type = end.to
1644    else:
1645        target_type = None
1646
1647    if start and end and target_type and target_type.is_type("date", "timestamp"):
1648        if isinstance(start, exp.Cast) and target_type is start.to:
1649            end = exp.cast(end, target_type)
1650        else:
1651            start = exp.cast(start, target_type)
1652
1653    return self.func("SEQUENCE", start, end, step)