167 lines
4.9 KiB
Python
167 lines
4.9 KiB
Python
|
from __future__ import annotations
|
||
|
|
||
|
from typing import Any, Iterable, Iterator, Literal, Mapping, Sequence, Type, TypeVar
|
||
|
|
||
|
import pandas as pd
|
||
|
|
||
|
from .compute import CastOptions
|
||
|
|
||
|
class DataType: ...
|
||
|
class Date32Type(DataType): ...
|
||
|
class Date64Type(DataType): ...
|
||
|
class TimestampType(DataType): ...
|
||
|
|
||
|
def string() -> DataType: ...
|
||
|
def null() -> DataType: ...
|
||
|
def bool_() -> DataType: ...
|
||
|
def int8() -> DataType: ...
|
||
|
def int16() -> DataType: ...
|
||
|
def int32() -> DataType: ...
|
||
|
def int64() -> DataType: ...
|
||
|
def uint8() -> DataType: ...
|
||
|
def uint16() -> DataType: ...
|
||
|
def uint32() -> DataType: ...
|
||
|
def uint64() -> DataType: ...
|
||
|
def float16() -> DataType: ...
|
||
|
def float32() -> DataType: ...
|
||
|
def float64() -> DataType: ...
|
||
|
def date32() -> DataType: ...
|
||
|
def date64() -> DataType: ...
|
||
|
def binary(length: int = -1) -> DataType: ...
|
||
|
def large_binary() -> DataType: ...
|
||
|
def large_string() -> DataType: ...
|
||
|
def month_day_nano_interval() -> DataType: ...
|
||
|
def time32(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
|
||
|
def time64(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
|
||
|
def timestamp(
|
||
|
unit: Literal["s", "ms", "us", "ns"], tz: str | None = None
|
||
|
) -> DataType: ...
|
||
|
def duration(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
|
||
|
|
||
|
class MemoryPool: ...
|
||
|
class Schema: ...
|
||
|
class Field: ...
|
||
|
class NativeFile: ...
|
||
|
class MonthDayNano: ...
|
||
|
|
||
|
class Scalar:
|
||
|
def as_py(self) -> Any: ...
|
||
|
@property
|
||
|
def type(self) -> DataType: ...
|
||
|
|
||
|
A = TypeVar("A", bound="_PandasConvertible")
|
||
|
|
||
|
class _PandasConvertible:
|
||
|
@property
|
||
|
def type(self) -> DataType: ... # noqa: A003
|
||
|
def cast(
|
||
|
self: A,
|
||
|
target_type: DataType | None = None,
|
||
|
safe: bool = True,
|
||
|
options: CastOptions | None = None,
|
||
|
) -> A: ...
|
||
|
def __getitem__(self, index: int) -> Scalar: ...
|
||
|
def __iter__(self) -> Any: ...
|
||
|
def to_pylist(self) -> list[Any]: ...
|
||
|
def fill_null(self: A, fill_value: Any) -> A: ...
|
||
|
def drop_null(self: A) -> A: ...
|
||
|
|
||
|
class Array(_PandasConvertible): ...
|
||
|
class ChunkedArray(_PandasConvertible): ...
|
||
|
|
||
|
class StructArray(Array):
|
||
|
def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]: ...
|
||
|
|
||
|
T = TypeVar("T", bound="_Tabular")
|
||
|
|
||
|
class _Tabular:
|
||
|
@classmethod
|
||
|
def from_arrays(
|
||
|
cls: Type[T],
|
||
|
arrays: list[_PandasConvertible],
|
||
|
names: list[str] | None = None,
|
||
|
schema: Schema | None = None,
|
||
|
metadata: Mapping | None = None,
|
||
|
) -> T: ...
|
||
|
@classmethod
|
||
|
def from_pydict(
|
||
|
cls: Type[T],
|
||
|
mapping: Mapping,
|
||
|
schema: Schema | None = None,
|
||
|
metadata: Mapping | None = None,
|
||
|
) -> T: ...
|
||
|
def __getitem__(self, index: int) -> _PandasConvertible: ...
|
||
|
def __len__(self) -> int: ...
|
||
|
@property
|
||
|
def column_names(self) -> list[str]: ...
|
||
|
@property
|
||
|
def columns(self) -> list[_PandasConvertible]: ...
|
||
|
@property
|
||
|
def num_rows(self) -> int: ...
|
||
|
@property
|
||
|
def num_columns(self) -> int: ...
|
||
|
@property
|
||
|
def schema(self) -> Schema: ...
|
||
|
def append_column(
|
||
|
self: T, field_: str | Field, column: Array | ChunkedArray
|
||
|
) -> T: ...
|
||
|
def column(self, i: int | str) -> _PandasConvertible: ...
|
||
|
def equals(self: T, other: T, check_metadata: bool = False) -> bool: ...
|
||
|
def itercolumns(self) -> Iterator[_PandasConvertible]: ...
|
||
|
def rename_columns(self: T, names: list[str]) -> T: ...
|
||
|
def select(self: T, columns: Sequence[str | int]) -> T: ...
|
||
|
def set_column(
|
||
|
self: T, i: int, field_: str | Field, column: Array | ChunkedArray
|
||
|
) -> T: ...
|
||
|
def slice( # noqa: A003
|
||
|
self: T,
|
||
|
offset: int = 0,
|
||
|
length: int | None = None,
|
||
|
) -> T: ...
|
||
|
def sort_by(
|
||
|
self: T,
|
||
|
sorting: str | list[tuple[str, Literal["ascending", "descending"]]],
|
||
|
**kwargs: Any,
|
||
|
) -> T: ...
|
||
|
def to_pylist(self) -> list[dict[str, Any]]: ...
|
||
|
|
||
|
class RecordBatch(_Tabular): ...
|
||
|
|
||
|
class Table(_Tabular):
|
||
|
@classmethod
|
||
|
def from_batches(
|
||
|
cls,
|
||
|
batches: Iterable[RecordBatch],
|
||
|
schema: Schema | None = None,
|
||
|
) -> "Table": ...
|
||
|
def to_batches(self) -> list[RecordBatch]: ...
|
||
|
|
||
|
def scalar(value: Any, type: DataType) -> Scalar: ... # noqa: A002
|
||
|
def array(
|
||
|
obj: Iterable,
|
||
|
type: DataType | None = None, # noqa: A002
|
||
|
mask: Array | None = None,
|
||
|
size: int | None = None,
|
||
|
from_pandas: bool | None = None,
|
||
|
safe: bool = True,
|
||
|
memory_pool: MemoryPool | None = None,
|
||
|
) -> Array | ChunkedArray: ...
|
||
|
def concat_arrays(
|
||
|
arrays: Iterable[Array], memory_pool: MemoryPool | None = None
|
||
|
) -> Array: ...
|
||
|
def nulls(
|
||
|
size: int,
|
||
|
type: DataType | None = None, # noqa: A002
|
||
|
memory_pool: MemoryPool | None = None,
|
||
|
) -> Array: ...
|
||
|
def table(
|
||
|
data: pd.DataFrame
|
||
|
| Mapping[str, _PandasConvertible | list]
|
||
|
| list[_PandasConvertible],
|
||
|
names: list[str] | None = None,
|
||
|
schema: Schema | None = None,
|
||
|
metadata: Mapping | None = None,
|
||
|
nthreads: int | None = None,
|
||
|
) -> Table: ...
|
||
|
def set_timezone_db_path(path: str) -> None: ...
|