Adding upstream version 0.12.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-24 10:57:24 +01:00 · 2025-02-24 10:57:24 +01:00 · 148efc9122
commit 148efc9122
parent d887bee5ca
69 changed files with 12923 additions and 0 deletions
--- a/stubs/pyarrow/init.pyi
+++ b/stubs/pyarrow/init.pyi
@ -0,0 +1,166 @@
+from __future__ import annotations
+
+from typing import Any, Iterable, Iterator, Literal, Mapping, Sequence, Type, TypeVar
+
+import pandas as pd
+
+from .compute import CastOptions
+
+class DataType: ...
+class Date32Type(DataType): ...
+class Date64Type(DataType): ...
+class TimestampType(DataType): ...
+
+def string() -> DataType: ...
+def null() -> DataType: ...
+def bool_() -> DataType: ...
+def int8() -> DataType: ...
+def int16() -> DataType: ...
+def int32() -> DataType: ...
+def int64() -> DataType: ...
+def uint8() -> DataType: ...
+def uint16() -> DataType: ...
+def uint32() -> DataType: ...
+def uint64() -> DataType: ...
+def float16() -> DataType: ...
+def float32() -> DataType: ...
+def float64() -> DataType: ...
+def date32() -> DataType: ...
+def date64() -> DataType: ...
+def binary(length: int = -1) -> DataType: ...
+def large_binary() -> DataType: ...
+def large_string() -> DataType: ...
+def month_day_nano_interval() -> DataType: ...
+def time32(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
+def time64(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
+def timestamp(
+    unit: Literal["s", "ms", "us", "ns"], tz: str | None = None
+) -> DataType: ...
+def duration(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
+
+class MemoryPool: ...
+class Schema: ...
+class Field: ...
+class NativeFile: ...
+class MonthDayNano: ...
+
+class Scalar:
+    def as_py(self) -> Any: ...
+    @property
+    def type(self) -> DataType: ...
+
+A = TypeVar("A", bound="_PandasConvertible")
+
+class _PandasConvertible:
+    @property
+    def type(self) -> DataType: ...  # noqa: A003
+    def cast(
+        self: A,
+        target_type: DataType | None = None,
+        safe: bool = True,
+        options: CastOptions | None = None,
+    ) -> A: ...
+    def __getitem__(self, index: int) -> Scalar: ...
+    def __iter__(self) -> Any: ...
+    def to_pylist(self) -> list[Any]: ...
+    def fill_null(self: A, fill_value: Any) -> A: ...
+    def drop_null(self: A) -> A: ...
+
+class Array(_PandasConvertible): ...
+class ChunkedArray(_PandasConvertible): ...
+
+class StructArray(Array):
+    def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]: ...
+
+T = TypeVar("T", bound="_Tabular")
+
+class _Tabular:
+    @classmethod
+    def from_arrays(
+        cls: Type[T],
+        arrays: list[_PandasConvertible],
+        names: list[str] | None = None,
+        schema: Schema | None = None,
+        metadata: Mapping | None = None,
+    ) -> T: ...
+    @classmethod
+    def from_pydict(
+        cls: Type[T],
+        mapping: Mapping,
+        schema: Schema | None = None,
+        metadata: Mapping | None = None,
+    ) -> T: ...
+    def __getitem__(self, index: int) -> _PandasConvertible: ...
+    def __len__(self) -> int: ...
+    @property
+    def column_names(self) -> list[str]: ...
+    @property
+    def columns(self) -> list[_PandasConvertible]: ...
+    @property
+    def num_rows(self) -> int: ...
+    @property
+    def num_columns(self) -> int: ...
+    @property
+    def schema(self) -> Schema: ...
+    def append_column(
+        self: T, field_: str | Field, column: Array | ChunkedArray
+    ) -> T: ...
+    def column(self, i: int | str) -> _PandasConvertible: ...
+    def equals(self: T, other: T, check_metadata: bool = False) -> bool: ...
+    def itercolumns(self) -> Iterator[_PandasConvertible]: ...
+    def rename_columns(self: T, names: list[str]) -> T: ...
+    def select(self: T, columns: Sequence[str | int]) -> T: ...
+    def set_column(
+        self: T, i: int, field_: str | Field, column: Array | ChunkedArray
+    ) -> T: ...
+    def slice(  # noqa: A003
+        self: T,
+        offset: int = 0,
+        length: int | None = None,
+    ) -> T: ...
+    def sort_by(
+        self: T,
+        sorting: str | list[tuple[str, Literal["ascending", "descending"]]],
+        **kwargs: Any,
+    ) -> T: ...
+    def to_pylist(self) -> list[dict[str, Any]]: ...
+
+class RecordBatch(_Tabular): ...
+
+class Table(_Tabular):
+    @classmethod
+    def from_batches(
+        cls,
+        batches: Iterable[RecordBatch],
+        schema: Schema | None = None,
+    ) -> "Table": ...
+    def to_batches(self) -> list[RecordBatch]: ...
+
+def scalar(value: Any, type: DataType) -> Scalar: ...  # noqa: A002
+def array(
+    obj: Iterable,
+    type: DataType | None = None,  # noqa: A002
+    mask: Array | None = None,
+    size: int | None = None,
+    from_pandas: bool | None = None,
+    safe: bool = True,
+    memory_pool: MemoryPool | None = None,
+) -> Array | ChunkedArray: ...
+def concat_arrays(
+    arrays: Iterable[Array], memory_pool: MemoryPool | None = None
+) -> Array: ...
+def nulls(
+    size: int,
+    type: DataType | None = None,  # noqa: A002
+    memory_pool: MemoryPool | None = None,
+) -> Array: ...
+def table(
+    data: pd.DataFrame
+    | Mapping[str, _PandasConvertible | list]
+    | list[_PandasConvertible],
+    names: list[str] | None = None,
+    schema: Schema | None = None,
+    metadata: Mapping | None = None,
+    nthreads: int | None = None,
+) -> Table: ...
+def set_timezone_db_path(path: str) -> None: ...
--- a/stubs/pyarrow/compute.pyi
+++ b/stubs/pyarrow/compute.pyi
@ -0,0 +1,64 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Callable, Literal
+
+from . import DataType, MemoryPool, Scalar, _PandasConvertible
+
+class Expression: ...
+class ScalarAggregateOptions: ...
+
+class CastOptions:
+    def __init__(
+        self,
+        target_type: DataType | None = None,
+        allow_int_overflow: bool | None = None,
+        allow_time_truncate: bool | None = None,
+        allow_time_overflow: bool | None = None,
+        allow_decimal_truncate: bool | None = None,
+        allow_float_truncate: bool | None = None,
+        allow_invalid_utf8: bool | None = None,
+    ) -> None: ...
+
+def max(  # noqa: A001
+    array: _PandasConvertible,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Scalar: ...
+def min(  # noqa: A001
+    array: _PandasConvertible,
+    /,
+    *,
+    skip_nulls: bool = True,
+    min_count: int = 1,
+    options: ScalarAggregateOptions | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> Scalar: ...
+def utf8_length(
+    strings: _PandasConvertible, /, *, memory_pool: MemoryPool | None = None
+) -> _PandasConvertible: ...
+def register_scalar_function(
+    func: Callable,
+    function_name: str,
+    function_doc: dict[Literal["summary", "description"], str],
+    in_types: dict[str, DataType],
+    out_type: DataType,
+    func_registry: Any | None = None,
+) -> None: ...
+def call_function(
+    function_name: str, target: list[_PandasConvertible]
+) -> _PandasConvertible: ...
+def assume_timezone(
+    timestamps: _PandasConvertible | Scalar | datetime,
+    /,
+    timezone: str,
+    *,
+    ambiguous: Literal["raise", "earliest", "latest"] = "raise",
+    nonexistent: Literal["raise", "earliest", "latest"] = "raise",
+    options: Any | None = None,
+    memory_pool: MemoryPool | None = None,
+) -> _PandasConvertible: ...
--- a/stubs/pyarrow/dataset.pyi
+++ b/stubs/pyarrow/dataset.pyi
@ -0,0 +1 @@
+class Partitioning: ...
--- a/stubs/pyarrow/fs.pyi
+++ b/stubs/pyarrow/fs.pyi
@ -0,0 +1 @@
+class FileSystem: ...
--- a/stubs/pyarrow/lib.pyi
+++ b/stubs/pyarrow/lib.pyi
@ -0,0 +1,32 @@
+from . import Date32Type, Date64Type, Scalar, TimestampType
+
+class ArrowException(Exception): ...
+class ArrowInvalid(ValueError, ArrowException): ...
+class ArrowMemoryError(MemoryError, ArrowException): ...
+class ArrowKeyError(KeyError, Exception): ...
+class ArrowTypeError(TypeError, Exception): ...
+class ArrowNotImplementedError(NotImplementedError, ArrowException): ...
+class ArrowCapacityError(ArrowException): ...
+class ArrowIndexError(IndexError, ArrowException): ...
+class ArrowSerializationError(ArrowException): ...
+class ArrowCancelled(ArrowException): ...
+
+ArrowIOError = IOError
+
+class Date32Scalar(Scalar):
+    @property
+    def type(self) -> Date32Type: ...
+    @property
+    def value(self) -> int: ...
+
+class Date64Scalar(Scalar):
+    @property
+    def type(self) -> Date64Type: ...
+    @property
+    def value(self) -> int: ...
+
+class TimestampScalar(Scalar):
+    @property
+    def type(self) -> TimestampType: ...
+    @property
+    def value(self) -> int: ...
--- a/stubs/pyarrow/parquet.pyi
+++ b/stubs/pyarrow/parquet.pyi
@ -0,0 +1,60 @@
+from __future__ import annotations
+
+from typing import Any, BinaryIO, Literal
+
+from . import NativeFile, Schema, Table
+from .compute import Expression
+from .dataset import Partitioning
+from .fs import FileSystem
+
+class FileMetaData: ...
+
+def read_table(
+    source: str | NativeFile | BinaryIO,
+    *,
+    columns: list | None = None,
+    use_threads: bool = True,
+    metadata: FileMetaData | None = None,
+    schema: Schema | None = None,
+    use_pandas_metadata: bool = False,
+    read_dictionary: list | None = None,
+    memory_map: bool = False,
+    buffer_size: int = 0,
+    partitioning: Partitioning | str | list[str] = "hive",
+    filesystem: FileSystem | None = None,
+    filters: Expression | list[tuple] | list[list[tuple]] | None = None,
+    use_legacy_dataset: bool = False,
+    ignore_prefixes: list | None = None,
+    pre_buffer: bool = True,
+    coerce_int96_timestamp_unit: str | None = None,
+    decryption_properties: Any | None = None,
+    thrift_string_size_limit: int | None = None,
+    thrift_container_size_limit: int | None = None,
+) -> Table: ...
+def write_table(
+    table: Table,
+    where: str | NativeFile,
+    row_group_size: int | None = None,
+    version: Literal["1.0", "2.4", "2.6"] = "2.6",
+    use_dictionary: bool | list = True,
+    compression: Literal["none", "snappy", "gzip", "brotli", "lz4", "zstd"]
+    | dict[str, Literal["none", "snappy", "gzip", "brotli", "lz4", "zstd"]] = "snappy",
+    write_statistics: bool | list = True,
+    use_deprecated_int96_timestamps: bool | None = None,
+    coerce_timestamps: str | None = None,
+    allow_truncated_timestamps: bool = False,
+    data_page_size: int | None = None,
+    flavor: Literal["spark"] | None = None,
+    filesystem: FileSystem | None = None,
+    compression_level: int | dict | None = None,
+    use_byte_stream_split: bool | list = False,
+    column_encoding: str | dict | None = None,
+    data_page_version: Literal["1.0", "2.0"] = "1.0",
+    use_compliant_nested_type: bool = True,
+    encryption_properties: Any | None = None,
+    write_batch_size: int | None = None,
+    dictionary_pagesize_limit: int | None = None,
+    store_schema: bool = True,
+    write_page_index: bool = False,
+    **kwargs: Any,
+) -> None: ...
--- a/stubs/pyarrow/types.pyi
+++ b/stubs/pyarrow/types.pyi
@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from . import DataType, Date32Type, Date64Type, TimestampType
+
+def is_null(t: DataType) -> bool: ...
+def is_struct(t: DataType) -> bool: ...
+def is_boolean(t: DataType) -> bool: ...
+def is_integer(t: DataType) -> bool: ...
+def is_floating(t: DataType) -> bool: ...
+def is_decimal(t: DataType) -> bool: ...
+def is_temporal(t: DataType) -> bool: ...
+def is_date(t: DataType) -> bool: ...
+def is_date32(t: DataType) -> bool:
+    if isinstance(t, Date32Type):
+        return True
+    return False
+
+def is_date64(t: DataType) -> bool:
+    if isinstance(t, Date64Type):
+        return True
+    return False
+
+def is_time(t: DataType) -> bool: ...
+def is_timestamp(t: DataType) -> bool:
+    if isinstance(t, TimestampType):
+        return True
+    return False