1
0
Fork 0
sqlglot/sqlglot/dialects/doris.py
Daniel Baumann 021892b3ff
Merging upstream version 25.0.3.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 21:37:40 +01:00

548 lines
14 KiB
Python

from __future__ import annotations
from sqlglot import exp
from sqlglot.dialects.dialect import (
approx_count_distinct_sql,
build_timestamp_trunc,
rename_func,
time_format,
unit_to_str,
)
from sqlglot.dialects.mysql import MySQL
class Doris(MySQL):
DATE_FORMAT = "'yyyy-MM-dd'"
DATEINT_FORMAT = "'yyyyMMdd'"
TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
class Parser(MySQL.Parser):
FUNCTIONS = {
**MySQL.Parser.FUNCTIONS,
"COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
"DATE_TRUNC": build_timestamp_trunc,
"MONTHS_ADD": exp.AddMonths.from_arg_list,
"REGEXP": exp.RegexpLike.from_arg_list,
"TO_DATE": exp.TsOrDsToDate.from_arg_list,
}
FUNCTION_PARSERS = MySQL.Parser.FUNCTION_PARSERS.copy()
FUNCTION_PARSERS.pop("GROUP_CONCAT")
class Generator(MySQL.Generator):
LAST_DAY_SUPPORTS_DATE_PART = False
TYPE_MAPPING = {
**MySQL.Generator.TYPE_MAPPING,
exp.DataType.Type.TEXT: "STRING",
exp.DataType.Type.TIMESTAMP: "DATETIME",
exp.DataType.Type.TIMESTAMPTZ: "DATETIME",
}
CAST_MAPPING = {}
TIMESTAMP_FUNC_TYPES = set()
TRANSFORMS = {
**MySQL.Generator.TRANSFORMS,
exp.AddMonths: rename_func("MONTHS_ADD"),
exp.ApproxDistinct: approx_count_distinct_sql,
exp.ArgMax: rename_func("MAX_BY"),
exp.ArgMin: rename_func("MIN_BY"),
exp.ArrayAgg: rename_func("COLLECT_LIST"),
exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
exp.CurrentTimestamp: lambda self, _: self.func("NOW"),
exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, unit_to_str(e)),
exp.GroupConcat: lambda self, e: self.func(
"GROUP_CONCAT", e.this, e.args.get("separator") or exp.Literal.string(",")
),
exp.JSONExtractScalar: lambda self, e: self.func("JSON_EXTRACT", e.this, e.expression),
exp.Map: rename_func("ARRAY_MAP"),
exp.RegexpLike: rename_func("REGEXP"),
exp.RegexpSplit: rename_func("SPLIT_BY_STRING"),
exp.Split: rename_func("SPLIT_BY_STRING"),
exp.StringToArray: rename_func("SPLIT_BY_STRING"),
exp.StrToUnix: lambda self, e: self.func("UNIX_TIMESTAMP", e.this, self.format_time(e)),
exp.TimeStrToDate: rename_func("TO_DATE"),
exp.TsOrDsAdd: lambda self, e: self.func("DATE_ADD", e.this, e.expression),
exp.TsOrDsToDate: lambda self, e: self.func("TO_DATE", e.this),
exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
exp.TimestampTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, unit_to_str(e)),
exp.UnixToStr: lambda self, e: self.func(
"FROM_UNIXTIME", e.this, time_format("doris")(self, e)
),
exp.UnixToTime: rename_func("FROM_UNIXTIME"),
}
# https://github.com/apache/doris/blob/e4f41dbf1ec03f5937fdeba2ee1454a20254015b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4#L93
RESERVED_KEYWORDS = {
"account_lock",
"account_unlock",
"add",
"adddate",
"admin",
"after",
"agg_state",
"aggregate",
"alias",
"all",
"alter",
"analyze",
"analyzed",
"and",
"anti",
"append",
"array",
"array_range",
"as",
"asc",
"at",
"authors",
"auto",
"auto_increment",
"backend",
"backends",
"backup",
"begin",
"belong",
"between",
"bigint",
"bin",
"binary",
"binlog",
"bitand",
"bitmap",
"bitmap_union",
"bitor",
"bitxor",
"blob",
"boolean",
"brief",
"broker",
"buckets",
"build",
"builtin",
"bulk",
"by",
"cached",
"call",
"cancel",
"case",
"cast",
"catalog",
"catalogs",
"chain",
"char",
"character",
"charset",
"check",
"clean",
"cluster",
"clusters",
"collate",
"collation",
"collect",
"column",
"columns",
"comment",
"commit",
"committed",
"compact",
"complete",
"config",
"connection",
"connection_id",
"consistent",
"constraint",
"constraints",
"convert",
"copy",
"count",
"create",
"creation",
"cron",
"cross",
"cube",
"current",
"current_catalog",
"current_date",
"current_time",
"current_timestamp",
"current_user",
"data",
"database",
"databases",
"date",
"date_add",
"date_ceil",
"date_diff",
"date_floor",
"date_sub",
"dateadd",
"datediff",
"datetime",
"datetimev2",
"datev2",
"datetimev1",
"datev1",
"day",
"days_add",
"days_sub",
"decimal",
"decimalv2",
"decimalv3",
"decommission",
"default",
"deferred",
"delete",
"demand",
"desc",
"describe",
"diagnose",
"disk",
"distinct",
"distinctpc",
"distinctpcsa",
"distributed",
"distribution",
"div",
"do",
"doris_internal_table_id",
"double",
"drop",
"dropp",
"dual",
"duplicate",
"dynamic",
"else",
"enable",
"encryptkey",
"encryptkeys",
"end",
"ends",
"engine",
"engines",
"enter",
"errors",
"events",
"every",
"except",
"exclude",
"execute",
"exists",
"expired",
"explain",
"export",
"extended",
"external",
"extract",
"failed_login_attempts",
"false",
"fast",
"feature",
"fields",
"file",
"filter",
"first",
"float",
"follower",
"following",
"for",
"foreign",
"force",
"format",
"free",
"from",
"frontend",
"frontends",
"full",
"function",
"functions",
"generic",
"global",
"grant",
"grants",
"graph",
"group",
"grouping",
"groups",
"hash",
"having",
"hdfs",
"help",
"histogram",
"hll",
"hll_union",
"hostname",
"hour",
"hub",
"identified",
"if",
"ignore",
"immediate",
"in",
"incremental",
"index",
"indexes",
"infile",
"inner",
"insert",
"install",
"int",
"integer",
"intermediate",
"intersect",
"interval",
"into",
"inverted",
"ipv4",
"ipv6",
"is",
"is_not_null_pred",
"is_null_pred",
"isnull",
"isolation",
"job",
"jobs",
"join",
"json",
"jsonb",
"key",
"keys",
"kill",
"label",
"largeint",
"last",
"lateral",
"ldap",
"ldap_admin_password",
"left",
"less",
"level",
"like",
"limit",
"lines",
"link",
"list",
"load",
"local",
"localtime",
"localtimestamp",
"location",
"lock",
"logical",
"low_priority",
"manual",
"map",
"match",
"match_all",
"match_any",
"match_phrase",
"match_phrase_edge",
"match_phrase_prefix",
"match_regexp",
"materialized",
"max",
"maxvalue",
"memo",
"merge",
"migrate",
"migrations",
"min",
"minus",
"minute",
"modify",
"month",
"mtmv",
"name",
"names",
"natural",
"negative",
"never",
"next",
"ngram_bf",
"no",
"non_nullable",
"not",
"null",
"nulls",
"observer",
"of",
"offset",
"on",
"only",
"open",
"optimized",
"or",
"order",
"outer",
"outfile",
"over",
"overwrite",
"parameter",
"parsed",
"partition",
"partitions",
"password",
"password_expire",
"password_history",
"password_lock_time",
"password_reuse",
"path",
"pause",
"percent",
"period",
"permissive",
"physical",
"plan",
"process",
"plugin",
"plugins",
"policy",
"preceding",
"prepare",
"primary",
"proc",
"procedure",
"processlist",
"profile",
"properties",
"property",
"quantile_state",
"quantile_union",
"query",
"quota",
"random",
"range",
"read",
"real",
"rebalance",
"recover",
"recycle",
"refresh",
"references",
"regexp",
"release",
"rename",
"repair",
"repeatable",
"replace",
"replace_if_not_null",
"replica",
"repositories",
"repository",
"resource",
"resources",
"restore",
"restrictive",
"resume",
"returns",
"revoke",
"rewritten",
"right",
"rlike",
"role",
"roles",
"rollback",
"rollup",
"routine",
"row",
"rows",
"s3",
"sample",
"schedule",
"scheduler",
"schema",
"schemas",
"second",
"select",
"semi",
"sequence",
"serializable",
"session",
"set",
"sets",
"shape",
"show",
"signed",
"skew",
"smallint",
"snapshot",
"soname",
"split",
"sql_block_rule",
"start",
"starts",
"stats",
"status",
"stop",
"storage",
"stream",
"streaming",
"string",
"struct",
"subdate",
"sum",
"superuser",
"switch",
"sync",
"system",
"table",
"tables",
"tablesample",
"tablet",
"tablets",
"task",
"tasks",
"temporary",
"terminated",
"text",
"than",
"then",
"time",
"timestamp",
"timestampadd",
"timestampdiff",
"tinyint",
"to",
"transaction",
"trash",
"tree",
"triggers",
"trim",
"true",
"truncate",
"type",
"type_cast",
"types",
"unbounded",
"uncommitted",
"uninstall",
"union",
"unique",
"unlock",
"unsigned",
"update",
"use",
"user",
"using",
"value",
"values",
"varchar",
"variables",
"variant",
"vault",
"verbose",
"version",
"view",
"warnings",
"week",
"when",
"where",
"whitelist",
"with",
"work",
"workload",
"write",
"xor",
"year",
}