548 lines
14 KiB
Python
548 lines
14 KiB
Python
from __future__ import annotations
|
|
|
|
from sqlglot import exp
|
|
from sqlglot.dialects.dialect import (
|
|
approx_count_distinct_sql,
|
|
build_timestamp_trunc,
|
|
rename_func,
|
|
time_format,
|
|
unit_to_str,
|
|
)
|
|
from sqlglot.dialects.mysql import MySQL
|
|
|
|
|
|
class Doris(MySQL):
|
|
DATE_FORMAT = "'yyyy-MM-dd'"
|
|
DATEINT_FORMAT = "'yyyyMMdd'"
|
|
TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
|
|
|
|
class Parser(MySQL.Parser):
|
|
FUNCTIONS = {
|
|
**MySQL.Parser.FUNCTIONS,
|
|
"COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
|
|
"DATE_TRUNC": build_timestamp_trunc,
|
|
"MONTHS_ADD": exp.AddMonths.from_arg_list,
|
|
"REGEXP": exp.RegexpLike.from_arg_list,
|
|
"TO_DATE": exp.TsOrDsToDate.from_arg_list,
|
|
}
|
|
|
|
FUNCTION_PARSERS = MySQL.Parser.FUNCTION_PARSERS.copy()
|
|
FUNCTION_PARSERS.pop("GROUP_CONCAT")
|
|
|
|
class Generator(MySQL.Generator):
|
|
LAST_DAY_SUPPORTS_DATE_PART = False
|
|
|
|
TYPE_MAPPING = {
|
|
**MySQL.Generator.TYPE_MAPPING,
|
|
exp.DataType.Type.TEXT: "STRING",
|
|
exp.DataType.Type.TIMESTAMP: "DATETIME",
|
|
exp.DataType.Type.TIMESTAMPTZ: "DATETIME",
|
|
}
|
|
|
|
CAST_MAPPING = {}
|
|
TIMESTAMP_FUNC_TYPES = set()
|
|
|
|
TRANSFORMS = {
|
|
**MySQL.Generator.TRANSFORMS,
|
|
exp.AddMonths: rename_func("MONTHS_ADD"),
|
|
exp.ApproxDistinct: approx_count_distinct_sql,
|
|
exp.ArgMax: rename_func("MAX_BY"),
|
|
exp.ArgMin: rename_func("MIN_BY"),
|
|
exp.ArrayAgg: rename_func("COLLECT_LIST"),
|
|
exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
|
|
exp.CurrentTimestamp: lambda self, _: self.func("NOW"),
|
|
exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, unit_to_str(e)),
|
|
exp.GroupConcat: lambda self, e: self.func(
|
|
"GROUP_CONCAT", e.this, e.args.get("separator") or exp.Literal.string(",")
|
|
),
|
|
exp.JSONExtractScalar: lambda self, e: self.func("JSON_EXTRACT", e.this, e.expression),
|
|
exp.Map: rename_func("ARRAY_MAP"),
|
|
exp.RegexpLike: rename_func("REGEXP"),
|
|
exp.RegexpSplit: rename_func("SPLIT_BY_STRING"),
|
|
exp.Split: rename_func("SPLIT_BY_STRING"),
|
|
exp.StringToArray: rename_func("SPLIT_BY_STRING"),
|
|
exp.StrToUnix: lambda self, e: self.func("UNIX_TIMESTAMP", e.this, self.format_time(e)),
|
|
exp.TimeStrToDate: rename_func("TO_DATE"),
|
|
exp.TsOrDsAdd: lambda self, e: self.func("DATE_ADD", e.this, e.expression),
|
|
exp.TsOrDsToDate: lambda self, e: self.func("TO_DATE", e.this),
|
|
exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
|
|
exp.TimestampTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, unit_to_str(e)),
|
|
exp.UnixToStr: lambda self, e: self.func(
|
|
"FROM_UNIXTIME", e.this, time_format("doris")(self, e)
|
|
),
|
|
exp.UnixToTime: rename_func("FROM_UNIXTIME"),
|
|
}
|
|
|
|
# https://github.com/apache/doris/blob/e4f41dbf1ec03f5937fdeba2ee1454a20254015b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4#L93
|
|
RESERVED_KEYWORDS = {
|
|
"account_lock",
|
|
"account_unlock",
|
|
"add",
|
|
"adddate",
|
|
"admin",
|
|
"after",
|
|
"agg_state",
|
|
"aggregate",
|
|
"alias",
|
|
"all",
|
|
"alter",
|
|
"analyze",
|
|
"analyzed",
|
|
"and",
|
|
"anti",
|
|
"append",
|
|
"array",
|
|
"array_range",
|
|
"as",
|
|
"asc",
|
|
"at",
|
|
"authors",
|
|
"auto",
|
|
"auto_increment",
|
|
"backend",
|
|
"backends",
|
|
"backup",
|
|
"begin",
|
|
"belong",
|
|
"between",
|
|
"bigint",
|
|
"bin",
|
|
"binary",
|
|
"binlog",
|
|
"bitand",
|
|
"bitmap",
|
|
"bitmap_union",
|
|
"bitor",
|
|
"bitxor",
|
|
"blob",
|
|
"boolean",
|
|
"brief",
|
|
"broker",
|
|
"buckets",
|
|
"build",
|
|
"builtin",
|
|
"bulk",
|
|
"by",
|
|
"cached",
|
|
"call",
|
|
"cancel",
|
|
"case",
|
|
"cast",
|
|
"catalog",
|
|
"catalogs",
|
|
"chain",
|
|
"char",
|
|
"character",
|
|
"charset",
|
|
"check",
|
|
"clean",
|
|
"cluster",
|
|
"clusters",
|
|
"collate",
|
|
"collation",
|
|
"collect",
|
|
"column",
|
|
"columns",
|
|
"comment",
|
|
"commit",
|
|
"committed",
|
|
"compact",
|
|
"complete",
|
|
"config",
|
|
"connection",
|
|
"connection_id",
|
|
"consistent",
|
|
"constraint",
|
|
"constraints",
|
|
"convert",
|
|
"copy",
|
|
"count",
|
|
"create",
|
|
"creation",
|
|
"cron",
|
|
"cross",
|
|
"cube",
|
|
"current",
|
|
"current_catalog",
|
|
"current_date",
|
|
"current_time",
|
|
"current_timestamp",
|
|
"current_user",
|
|
"data",
|
|
"database",
|
|
"databases",
|
|
"date",
|
|
"date_add",
|
|
"date_ceil",
|
|
"date_diff",
|
|
"date_floor",
|
|
"date_sub",
|
|
"dateadd",
|
|
"datediff",
|
|
"datetime",
|
|
"datetimev2",
|
|
"datev2",
|
|
"datetimev1",
|
|
"datev1",
|
|
"day",
|
|
"days_add",
|
|
"days_sub",
|
|
"decimal",
|
|
"decimalv2",
|
|
"decimalv3",
|
|
"decommission",
|
|
"default",
|
|
"deferred",
|
|
"delete",
|
|
"demand",
|
|
"desc",
|
|
"describe",
|
|
"diagnose",
|
|
"disk",
|
|
"distinct",
|
|
"distinctpc",
|
|
"distinctpcsa",
|
|
"distributed",
|
|
"distribution",
|
|
"div",
|
|
"do",
|
|
"doris_internal_table_id",
|
|
"double",
|
|
"drop",
|
|
"dropp",
|
|
"dual",
|
|
"duplicate",
|
|
"dynamic",
|
|
"else",
|
|
"enable",
|
|
"encryptkey",
|
|
"encryptkeys",
|
|
"end",
|
|
"ends",
|
|
"engine",
|
|
"engines",
|
|
"enter",
|
|
"errors",
|
|
"events",
|
|
"every",
|
|
"except",
|
|
"exclude",
|
|
"execute",
|
|
"exists",
|
|
"expired",
|
|
"explain",
|
|
"export",
|
|
"extended",
|
|
"external",
|
|
"extract",
|
|
"failed_login_attempts",
|
|
"false",
|
|
"fast",
|
|
"feature",
|
|
"fields",
|
|
"file",
|
|
"filter",
|
|
"first",
|
|
"float",
|
|
"follower",
|
|
"following",
|
|
"for",
|
|
"foreign",
|
|
"force",
|
|
"format",
|
|
"free",
|
|
"from",
|
|
"frontend",
|
|
"frontends",
|
|
"full",
|
|
"function",
|
|
"functions",
|
|
"generic",
|
|
"global",
|
|
"grant",
|
|
"grants",
|
|
"graph",
|
|
"group",
|
|
"grouping",
|
|
"groups",
|
|
"hash",
|
|
"having",
|
|
"hdfs",
|
|
"help",
|
|
"histogram",
|
|
"hll",
|
|
"hll_union",
|
|
"hostname",
|
|
"hour",
|
|
"hub",
|
|
"identified",
|
|
"if",
|
|
"ignore",
|
|
"immediate",
|
|
"in",
|
|
"incremental",
|
|
"index",
|
|
"indexes",
|
|
"infile",
|
|
"inner",
|
|
"insert",
|
|
"install",
|
|
"int",
|
|
"integer",
|
|
"intermediate",
|
|
"intersect",
|
|
"interval",
|
|
"into",
|
|
"inverted",
|
|
"ipv4",
|
|
"ipv6",
|
|
"is",
|
|
"is_not_null_pred",
|
|
"is_null_pred",
|
|
"isnull",
|
|
"isolation",
|
|
"job",
|
|
"jobs",
|
|
"join",
|
|
"json",
|
|
"jsonb",
|
|
"key",
|
|
"keys",
|
|
"kill",
|
|
"label",
|
|
"largeint",
|
|
"last",
|
|
"lateral",
|
|
"ldap",
|
|
"ldap_admin_password",
|
|
"left",
|
|
"less",
|
|
"level",
|
|
"like",
|
|
"limit",
|
|
"lines",
|
|
"link",
|
|
"list",
|
|
"load",
|
|
"local",
|
|
"localtime",
|
|
"localtimestamp",
|
|
"location",
|
|
"lock",
|
|
"logical",
|
|
"low_priority",
|
|
"manual",
|
|
"map",
|
|
"match",
|
|
"match_all",
|
|
"match_any",
|
|
"match_phrase",
|
|
"match_phrase_edge",
|
|
"match_phrase_prefix",
|
|
"match_regexp",
|
|
"materialized",
|
|
"max",
|
|
"maxvalue",
|
|
"memo",
|
|
"merge",
|
|
"migrate",
|
|
"migrations",
|
|
"min",
|
|
"minus",
|
|
"minute",
|
|
"modify",
|
|
"month",
|
|
"mtmv",
|
|
"name",
|
|
"names",
|
|
"natural",
|
|
"negative",
|
|
"never",
|
|
"next",
|
|
"ngram_bf",
|
|
"no",
|
|
"non_nullable",
|
|
"not",
|
|
"null",
|
|
"nulls",
|
|
"observer",
|
|
"of",
|
|
"offset",
|
|
"on",
|
|
"only",
|
|
"open",
|
|
"optimized",
|
|
"or",
|
|
"order",
|
|
"outer",
|
|
"outfile",
|
|
"over",
|
|
"overwrite",
|
|
"parameter",
|
|
"parsed",
|
|
"partition",
|
|
"partitions",
|
|
"password",
|
|
"password_expire",
|
|
"password_history",
|
|
"password_lock_time",
|
|
"password_reuse",
|
|
"path",
|
|
"pause",
|
|
"percent",
|
|
"period",
|
|
"permissive",
|
|
"physical",
|
|
"plan",
|
|
"process",
|
|
"plugin",
|
|
"plugins",
|
|
"policy",
|
|
"preceding",
|
|
"prepare",
|
|
"primary",
|
|
"proc",
|
|
"procedure",
|
|
"processlist",
|
|
"profile",
|
|
"properties",
|
|
"property",
|
|
"quantile_state",
|
|
"quantile_union",
|
|
"query",
|
|
"quota",
|
|
"random",
|
|
"range",
|
|
"read",
|
|
"real",
|
|
"rebalance",
|
|
"recover",
|
|
"recycle",
|
|
"refresh",
|
|
"references",
|
|
"regexp",
|
|
"release",
|
|
"rename",
|
|
"repair",
|
|
"repeatable",
|
|
"replace",
|
|
"replace_if_not_null",
|
|
"replica",
|
|
"repositories",
|
|
"repository",
|
|
"resource",
|
|
"resources",
|
|
"restore",
|
|
"restrictive",
|
|
"resume",
|
|
"returns",
|
|
"revoke",
|
|
"rewritten",
|
|
"right",
|
|
"rlike",
|
|
"role",
|
|
"roles",
|
|
"rollback",
|
|
"rollup",
|
|
"routine",
|
|
"row",
|
|
"rows",
|
|
"s3",
|
|
"sample",
|
|
"schedule",
|
|
"scheduler",
|
|
"schema",
|
|
"schemas",
|
|
"second",
|
|
"select",
|
|
"semi",
|
|
"sequence",
|
|
"serializable",
|
|
"session",
|
|
"set",
|
|
"sets",
|
|
"shape",
|
|
"show",
|
|
"signed",
|
|
"skew",
|
|
"smallint",
|
|
"snapshot",
|
|
"soname",
|
|
"split",
|
|
"sql_block_rule",
|
|
"start",
|
|
"starts",
|
|
"stats",
|
|
"status",
|
|
"stop",
|
|
"storage",
|
|
"stream",
|
|
"streaming",
|
|
"string",
|
|
"struct",
|
|
"subdate",
|
|
"sum",
|
|
"superuser",
|
|
"switch",
|
|
"sync",
|
|
"system",
|
|
"table",
|
|
"tables",
|
|
"tablesample",
|
|
"tablet",
|
|
"tablets",
|
|
"task",
|
|
"tasks",
|
|
"temporary",
|
|
"terminated",
|
|
"text",
|
|
"than",
|
|
"then",
|
|
"time",
|
|
"timestamp",
|
|
"timestampadd",
|
|
"timestampdiff",
|
|
"tinyint",
|
|
"to",
|
|
"transaction",
|
|
"trash",
|
|
"tree",
|
|
"triggers",
|
|
"trim",
|
|
"true",
|
|
"truncate",
|
|
"type",
|
|
"type_cast",
|
|
"types",
|
|
"unbounded",
|
|
"uncommitted",
|
|
"uninstall",
|
|
"union",
|
|
"unique",
|
|
"unlock",
|
|
"unsigned",
|
|
"update",
|
|
"use",
|
|
"user",
|
|
"using",
|
|
"value",
|
|
"values",
|
|
"varchar",
|
|
"variables",
|
|
"variant",
|
|
"vault",
|
|
"verbose",
|
|
"version",
|
|
"view",
|
|
"warnings",
|
|
"week",
|
|
"when",
|
|
"where",
|
|
"whitelist",
|
|
"with",
|
|
"work",
|
|
"workload",
|
|
"write",
|
|
"xor",
|
|
"year",
|
|
}
|