Merging upstream version 26.11.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-03-23 10:42:04 +01:00 · 2025-03-23 10:42:04 +01:00 · 0b9b4fe8cb
commit 0b9b4fe8cb
parent 913e13699d
52 changed files with 14319 additions and 14244 deletions
--- a/sqlglot/optimizer/annotate_types.py
+++ b/sqlglot/optimizer/annotate_types.py
@ -223,7 +223,7 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
                        values = [expression.this.this]
                elif isinstance(expression, exp.Unnest):
                    values = [expression]
-                else:
+                elif not isinstance(expression, exp.TableFromRows):
                    values = expression.expressions[0].expressions

                if not values:
--- a/sqlglot/optimizer/qualify_columns.py
+++ b/sqlglot/optimizer/qualify_columns.py
@ -55,14 +55,13 @@ def qualify_columns(
    infer_schema = schema.empty if infer_schema is None else infer_schema
    dialect = Dialect.get_or_raise(schema.dialect)
    pseudocolumns = dialect.PSEUDOCOLUMNS
-
-    snowflake_or_oracle = dialect in ("oracle", "snowflake")
+    bigquery = dialect == "bigquery"

    for scope in traverse_scope(expression):
        scope_expression = scope.expression
        is_select = isinstance(scope_expression, exp.Select)

-        if is_select and snowflake_or_oracle and scope_expression.args.get("connect"):
+        if is_select and scope_expression.args.get("connect"):
            # In Snowflake / Oracle queries that have a CONNECT BY clause, one can use the LEVEL
            # pseudocolumn, which doesn't belong to a table, so we change it into an identifier
            scope_expression.transform(
@ -81,7 +80,7 @@ def qualify_columns(
                scope,
                resolver,
                dialect,
-                expand_only_groupby=dialect.EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY,
+                expand_only_groupby=bigquery,
            )

        _convert_columns_to_dots(scope, resolver)
@ -107,7 +106,7 @@ def qualify_columns(
        # https://www.postgresql.org/docs/current/sql-select.html#SQL-DISTINCT
        _expand_order_by_and_distinct_on(scope, resolver)

-        if dialect == "bigquery":
+        if bigquery:
            annotator.annotate_scope(scope)

    return expression
@ -277,11 +276,13 @@ def _expand_alias_refs(
        return

    alias_to_expression: t.Dict[str, t.Tuple[exp.Expression, int]] = {}
+    projections = {s.alias_or_name for s in expression.selects}

    def replace_columns(
        node: t.Optional[exp.Expression], resolve_table: bool = False, literal_index: bool = False
    ) -> None:
        is_group_by = isinstance(node, exp.Group)
+        is_having = isinstance(node, exp.Having)
        if not node or (expand_only_groupby and not is_group_by):
            return

@ -296,23 +297,29 @@ def _expand_alias_refs(
            if expand_only_groupby and is_group_by and column.parent is not node:
                continue

+            skip_replace = False
            table = resolver.get_table(column.name) if resolve_table and not column.table else None
            alias_expr, i = alias_to_expression.get(column.name, (None, 1))
-            double_agg = (
-                (
-                    alias_expr.find(exp.AggFunc)
-                    and (
-                        column.find_ancestor(exp.AggFunc)
-                        and not isinstance(column.find_ancestor(exp.Window, exp.Select), exp.Window)
-                    )
-                )
-                if alias_expr
-                else False
-            )

-            if table and (not alias_expr or double_agg):
+            if alias_expr:
+                skip_replace = bool(
+                    alias_expr.find(exp.AggFunc)
+                    and column.find_ancestor(exp.AggFunc)
+                    and not isinstance(column.find_ancestor(exp.Window, exp.Select), exp.Window)
+                )
+
+                # BigQuery's having clause gets confused if an alias matches a source.
+                # SELECT x.a, max(x.b) as x FROM x GROUP BY 1 HAVING x > 1;
+                # If HAVING x is expanded to max(x.b), bigquery treats x as the new projection x instead of the table
+                if is_having and dialect == "bigquery":
+                    skip_replace = skip_replace or any(
+                        node.parts[0].name in projections
+                        for node in alias_expr.find_all(exp.Column)
+                    )
+
+            if table and (not alias_expr or skip_replace):
                column.set("table", table)
-            elif not column.table and alias_expr and not double_agg:
+            elif not column.table and alias_expr and not skip_replace:
                if isinstance(alias_expr, exp.Literal) and (literal_index or resolve_table):
                    if literal_index:
                        column.replace(exp.Literal.number(i))