1
0
Fork 0

Adding upstream version 0.12.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-24 10:57:24 +01:00
parent d887bee5ca
commit 148efc9122
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
69 changed files with 12923 additions and 0 deletions

166
stubs/pyarrow/__init__.pyi Normal file
View file

@ -0,0 +1,166 @@
from __future__ import annotations
from typing import Any, Iterable, Iterator, Literal, Mapping, Sequence, Type, TypeVar
import pandas as pd
from .compute import CastOptions
class DataType: ...
class Date32Type(DataType): ...
class Date64Type(DataType): ...
class TimestampType(DataType): ...
def string() -> DataType: ...
def null() -> DataType: ...
def bool_() -> DataType: ...
def int8() -> DataType: ...
def int16() -> DataType: ...
def int32() -> DataType: ...
def int64() -> DataType: ...
def uint8() -> DataType: ...
def uint16() -> DataType: ...
def uint32() -> DataType: ...
def uint64() -> DataType: ...
def float16() -> DataType: ...
def float32() -> DataType: ...
def float64() -> DataType: ...
def date32() -> DataType: ...
def date64() -> DataType: ...
def binary(length: int = -1) -> DataType: ...
def large_binary() -> DataType: ...
def large_string() -> DataType: ...
def month_day_nano_interval() -> DataType: ...
def time32(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
def time64(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
def timestamp(
unit: Literal["s", "ms", "us", "ns"], tz: str | None = None
) -> DataType: ...
def duration(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
class MemoryPool: ...
class Schema: ...
class Field: ...
class NativeFile: ...
class MonthDayNano: ...
class Scalar:
def as_py(self) -> Any: ...
@property
def type(self) -> DataType: ...
A = TypeVar("A", bound="_PandasConvertible")
class _PandasConvertible:
@property
def type(self) -> DataType: ... # noqa: A003
def cast(
self: A,
target_type: DataType | None = None,
safe: bool = True,
options: CastOptions | None = None,
) -> A: ...
def __getitem__(self, index: int) -> Scalar: ...
def __iter__(self) -> Any: ...
def to_pylist(self) -> list[Any]: ...
def fill_null(self: A, fill_value: Any) -> A: ...
def drop_null(self: A) -> A: ...
class Array(_PandasConvertible): ...
class ChunkedArray(_PandasConvertible): ...
class StructArray(Array):
def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]: ...
T = TypeVar("T", bound="_Tabular")
class _Tabular:
@classmethod
def from_arrays(
cls: Type[T],
arrays: list[_PandasConvertible],
names: list[str] | None = None,
schema: Schema | None = None,
metadata: Mapping | None = None,
) -> T: ...
@classmethod
def from_pydict(
cls: Type[T],
mapping: Mapping,
schema: Schema | None = None,
metadata: Mapping | None = None,
) -> T: ...
def __getitem__(self, index: int) -> _PandasConvertible: ...
def __len__(self) -> int: ...
@property
def column_names(self) -> list[str]: ...
@property
def columns(self) -> list[_PandasConvertible]: ...
@property
def num_rows(self) -> int: ...
@property
def num_columns(self) -> int: ...
@property
def schema(self) -> Schema: ...
def append_column(
self: T, field_: str | Field, column: Array | ChunkedArray
) -> T: ...
def column(self, i: int | str) -> _PandasConvertible: ...
def equals(self: T, other: T, check_metadata: bool = False) -> bool: ...
def itercolumns(self) -> Iterator[_PandasConvertible]: ...
def rename_columns(self: T, names: list[str]) -> T: ...
def select(self: T, columns: Sequence[str | int]) -> T: ...
def set_column(
self: T, i: int, field_: str | Field, column: Array | ChunkedArray
) -> T: ...
def slice( # noqa: A003
self: T,
offset: int = 0,
length: int | None = None,
) -> T: ...
def sort_by(
self: T,
sorting: str | list[tuple[str, Literal["ascending", "descending"]]],
**kwargs: Any,
) -> T: ...
def to_pylist(self) -> list[dict[str, Any]]: ...
class RecordBatch(_Tabular): ...
class Table(_Tabular):
@classmethod
def from_batches(
cls,
batches: Iterable[RecordBatch],
schema: Schema | None = None,
) -> "Table": ...
def to_batches(self) -> list[RecordBatch]: ...
def scalar(value: Any, type: DataType) -> Scalar: ... # noqa: A002
def array(
obj: Iterable,
type: DataType | None = None, # noqa: A002
mask: Array | None = None,
size: int | None = None,
from_pandas: bool | None = None,
safe: bool = True,
memory_pool: MemoryPool | None = None,
) -> Array | ChunkedArray: ...
def concat_arrays(
arrays: Iterable[Array], memory_pool: MemoryPool | None = None
) -> Array: ...
def nulls(
size: int,
type: DataType | None = None, # noqa: A002
memory_pool: MemoryPool | None = None,
) -> Array: ...
def table(
data: pd.DataFrame
| Mapping[str, _PandasConvertible | list]
| list[_PandasConvertible],
names: list[str] | None = None,
schema: Schema | None = None,
metadata: Mapping | None = None,
nthreads: int | None = None,
) -> Table: ...
def set_timezone_db_path(path: str) -> None: ...

64
stubs/pyarrow/compute.pyi Normal file
View file

@ -0,0 +1,64 @@
from __future__ import annotations
from datetime import datetime
from typing import Any, Callable, Literal
from . import DataType, MemoryPool, Scalar, _PandasConvertible
class Expression: ...
class ScalarAggregateOptions: ...
class CastOptions:
def __init__(
self,
target_type: DataType | None = None,
allow_int_overflow: bool | None = None,
allow_time_truncate: bool | None = None,
allow_time_overflow: bool | None = None,
allow_decimal_truncate: bool | None = None,
allow_float_truncate: bool | None = None,
allow_invalid_utf8: bool | None = None,
) -> None: ...
def max( # noqa: A001
array: _PandasConvertible,
/,
*,
skip_nulls: bool = True,
min_count: int = 1,
options: ScalarAggregateOptions | None = None,
memory_pool: MemoryPool | None = None,
) -> Scalar: ...
def min( # noqa: A001
array: _PandasConvertible,
/,
*,
skip_nulls: bool = True,
min_count: int = 1,
options: ScalarAggregateOptions | None = None,
memory_pool: MemoryPool | None = None,
) -> Scalar: ...
def utf8_length(
strings: _PandasConvertible, /, *, memory_pool: MemoryPool | None = None
) -> _PandasConvertible: ...
def register_scalar_function(
func: Callable,
function_name: str,
function_doc: dict[Literal["summary", "description"], str],
in_types: dict[str, DataType],
out_type: DataType,
func_registry: Any | None = None,
) -> None: ...
def call_function(
function_name: str, target: list[_PandasConvertible]
) -> _PandasConvertible: ...
def assume_timezone(
timestamps: _PandasConvertible | Scalar | datetime,
/,
timezone: str,
*,
ambiguous: Literal["raise", "earliest", "latest"] = "raise",
nonexistent: Literal["raise", "earliest", "latest"] = "raise",
options: Any | None = None,
memory_pool: MemoryPool | None = None,
) -> _PandasConvertible: ...

View file

@ -0,0 +1 @@
class Partitioning: ...

1
stubs/pyarrow/fs.pyi Normal file
View file

@ -0,0 +1 @@
class FileSystem: ...

32
stubs/pyarrow/lib.pyi Normal file
View file

@ -0,0 +1,32 @@
from . import Date32Type, Date64Type, Scalar, TimestampType
class ArrowException(Exception): ...
class ArrowInvalid(ValueError, ArrowException): ...
class ArrowMemoryError(MemoryError, ArrowException): ...
class ArrowKeyError(KeyError, Exception): ...
class ArrowTypeError(TypeError, Exception): ...
class ArrowNotImplementedError(NotImplementedError, ArrowException): ...
class ArrowCapacityError(ArrowException): ...
class ArrowIndexError(IndexError, ArrowException): ...
class ArrowSerializationError(ArrowException): ...
class ArrowCancelled(ArrowException): ...
ArrowIOError = IOError
class Date32Scalar(Scalar):
@property
def type(self) -> Date32Type: ...
@property
def value(self) -> int: ...
class Date64Scalar(Scalar):
@property
def type(self) -> Date64Type: ...
@property
def value(self) -> int: ...
class TimestampScalar(Scalar):
@property
def type(self) -> TimestampType: ...
@property
def value(self) -> int: ...

60
stubs/pyarrow/parquet.pyi Normal file
View file

@ -0,0 +1,60 @@
from __future__ import annotations
from typing import Any, BinaryIO, Literal
from . import NativeFile, Schema, Table
from .compute import Expression
from .dataset import Partitioning
from .fs import FileSystem
class FileMetaData: ...
def read_table(
source: str | NativeFile | BinaryIO,
*,
columns: list | None = None,
use_threads: bool = True,
metadata: FileMetaData | None = None,
schema: Schema | None = None,
use_pandas_metadata: bool = False,
read_dictionary: list | None = None,
memory_map: bool = False,
buffer_size: int = 0,
partitioning: Partitioning | str | list[str] = "hive",
filesystem: FileSystem | None = None,
filters: Expression | list[tuple] | list[list[tuple]] | None = None,
use_legacy_dataset: bool = False,
ignore_prefixes: list | None = None,
pre_buffer: bool = True,
coerce_int96_timestamp_unit: str | None = None,
decryption_properties: Any | None = None,
thrift_string_size_limit: int | None = None,
thrift_container_size_limit: int | None = None,
) -> Table: ...
def write_table(
table: Table,
where: str | NativeFile,
row_group_size: int | None = None,
version: Literal["1.0", "2.4", "2.6"] = "2.6",
use_dictionary: bool | list = True,
compression: Literal["none", "snappy", "gzip", "brotli", "lz4", "zstd"]
| dict[str, Literal["none", "snappy", "gzip", "brotli", "lz4", "zstd"]] = "snappy",
write_statistics: bool | list = True,
use_deprecated_int96_timestamps: bool | None = None,
coerce_timestamps: str | None = None,
allow_truncated_timestamps: bool = False,
data_page_size: int | None = None,
flavor: Literal["spark"] | None = None,
filesystem: FileSystem | None = None,
compression_level: int | dict | None = None,
use_byte_stream_split: bool | list = False,
column_encoding: str | dict | None = None,
data_page_version: Literal["1.0", "2.0"] = "1.0",
use_compliant_nested_type: bool = True,
encryption_properties: Any | None = None,
write_batch_size: int | None = None,
dictionary_pagesize_limit: int | None = None,
store_schema: bool = True,
write_page_index: bool = False,
**kwargs: Any,
) -> None: ...

27
stubs/pyarrow/types.pyi Normal file
View file

@ -0,0 +1,27 @@
from __future__ import annotations
from . import DataType, Date32Type, Date64Type, TimestampType
def is_null(t: DataType) -> bool: ...
def is_struct(t: DataType) -> bool: ...
def is_boolean(t: DataType) -> bool: ...
def is_integer(t: DataType) -> bool: ...
def is_floating(t: DataType) -> bool: ...
def is_decimal(t: DataType) -> bool: ...
def is_temporal(t: DataType) -> bool: ...
def is_date(t: DataType) -> bool: ...
def is_date32(t: DataType) -> bool:
if isinstance(t, Date32Type):
return True
return False
def is_date64(t: DataType) -> bool:
if isinstance(t, Date64Type):
return True
return False
def is_time(t: DataType) -> bool: ...
def is_timestamp(t: DataType) -> bool:
if isinstance(t, TimestampType):
return True
return False