1
0
Fork 0

Adding upstream version 0.12.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-24 10:57:24 +01:00
parent d887bee5ca
commit 148efc9122
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
69 changed files with 12923 additions and 0 deletions

15
.github/dependabot.yml vendored Normal file
View file

@ -0,0 +1,15 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "pip" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "monthly"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"

47
.github/workflows/publish.yml vendored Normal file
View file

@ -0,0 +1,47 @@
name: Build and Publish Package
on:
pull_request:
branches:
- main
types:
- closed
jobs:
publish-package:
if: ${{ github.event.pull_request.merged == true && startsWith(github.event.pull_request.head.ref, 'release/v') }}
runs-on: ubuntu-latest
steps:
- name: Check out repo main branch
uses: actions/checkout@v4
with:
ref: main
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.6.1
- name: Configure poetry
run: poetry config --no-interaction pypi-token.pypi ${{ secrets.FASTDATATABLE_PYPI_TOKEN }}
- name: Get project Version
id: project_version
run: echo "project_version=$(poetry version --short)" >> $GITHUB_OUTPUT
- name: Build package
run: poetry build --no-interaction
- name: Publish package to PyPI
run: poetry publish --no-interaction
- name: Create a Github Release
uses: softprops/action-gh-release@v2
with:
tag_name: v${{ steps.project_version.outputs.project_version }}
target_commitish: main
token: ${{ secrets.FASTDATATABLE_RELEASE_TOKEN }}
body_path: CHANGELOG.md
files: |
LICENSE
dist/*textual_fastdatatable*.whl
dist/*textual_fastdatatable*.tar.gz

58
.github/workflows/release.yml vendored Normal file
View file

@ -0,0 +1,58 @@
name: Create Release Branch
on:
workflow_dispatch:
inputs:
newVersion:
description: A version number for this release (e.g., "0.1.0")
required: true
jobs:
prepare-release:
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Check out repo main branch
uses: actions/checkout@v4
with:
ref: main
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.6.1
- name: Create release branch
run: |
git checkout -b release/v${{ github.event.inputs.newVersion }}
git push --set-upstream origin release/v${{ github.event.inputs.newVersion }}
- name: Bump version
run: poetry version ${{ github.event.inputs.newVersion }} --no-interaction
- name: Ensure package can be built
run: poetry build --no-interaction
- name: Update CHANGELOG
uses: thomaseizinger/keep-a-changelog-new-release@v3
with:
version: ${{ github.event.inputs.newVersion }}
- name: Commit Changes
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: Bumps version to ${{ github.event.inputs.newVersion }}
- name: Create pull request into main
uses: thomaseizinger/create-pull-request@1.3.1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
head: release/v${{ github.event.inputs.newVersion }}
base: main
title: v${{ github.event.inputs.newVersion }}
body: >
This PR was automatically generated. It bumps the version number
in pyproject.toml and updates CHANGELOG.md. You may have to close
this PR and reopen it to get the required checks to run.

53
.github/workflows/static.yml vendored Normal file
View file

@ -0,0 +1,53 @@
name: "Perform Static Analysis"
on:
pull_request:
# will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise
concurrency:
group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request') && github.event.pull_request.head.ref || github.sha }}
cancel-in-progress: true
jobs:
static:
name: Static Analysis - 3.11
runs-on: ubuntu-latest
steps:
- name: Check out Repo
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Set up Python 3.11
uses: actions/setup-python@v5
id: setup-python
with:
python-version: "3.11"
- name: Load cached Poetry installation
id: cached-poetry-install
uses: actions/cache@v4
with:
path: ~/.local
key: poetry-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-0
- name: Install Poetry
if: steps.cached-poetry-install.outputs.cache-hit != 'true'
uses: snok/install-poetry@v1
with:
version: 1.4.2
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: .venv
key: static-venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install python dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --sync --no-interaction --without dev
- name: Run analysis
run: |
source .venv/bin/activate
ruff format --check
ruff check .
mypy

88
.github/workflows/test.yml vendored Normal file
View file

@ -0,0 +1,88 @@
name: Test
on:
push:
branches: [ main ]
pull_request:
# will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise
concurrency:
group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request') && github.event.pull_request.head.ref || github.sha }}
cancel-in-progress: true
defaults:
run:
shell: bash
jobs:
test-windows:
name: Windows - 3.10
runs-on: Windows-latest
steps:
- name: Check out Repo
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.2
- name: Install python dependencies
run: poetry install --sync --no-interaction --only main,test
- name: Run tests
run: poetry run pytest
test:
name: ${{ matrix.os }} - ${{ matrix.py }}
runs-on: ${{ matrix.os }}-latest
strategy:
fail-fast: false
matrix:
os:
- ubuntu
- MacOs
py:
- "3.11"
- "3.10"
- "3.9"
steps:
- name: Check out Repo
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Set up Python ${{ matrix.py }}
id: setup-python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.py }}
- name: Load cached Poetry installation
id: cached-poetry-install
uses: actions/cache@v4
with:
path: ~/.local
key: poetry-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install Poetry
if: steps.cached-poetry-install.outputs.cache-hit != 'true'
uses: snok/install-poetry@v1
with:
version: 1.4.2
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install python dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --sync --no-interaction --only main,test
- name: Run tests
run: |
source .venv/bin/activate
pytest

167
.gitignore vendored Normal file
View file

@ -0,0 +1,167 @@
.vscode
snapshot_report.html
profile*.html
results.md
Pipfile
.python-version
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

28
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,28 @@
repos:
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.5.1
hooks:
- id: ruff-format
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.11.0
hooks:
- id: mypy
additional_dependencies:
- textual>=0.72.0
- pytest
- pyarrow-stubs
- pandas-stubs
- polars
exclude: "tests/snapshot_tests/"
args:
- "--disallow-untyped-calls"
- "--disallow-untyped-defs"
- "--disallow-incomplete-defs"
- "--strict-optional"
- "--warn-return-any"
- "--warn-no-return"
- "--warn-redundant-casts"
- "--no-warn-unused-ignores"
- "--allow-untyped-decorators"

0
.vscode/settings.json vendored Normal file
View file

181
CHANGELOG.md Normal file
View file

@ -0,0 +1,181 @@
# Changelog
All notable changes to this project will be documented in this file.
## [Unreleased]
## [0.12.0] - 2025-02-06
- Catch overflow errors when casting arrow temporal types to python date and datetimes, and substitue date.max/min and datetime.max/min, instead of None.
- Format date.max/min and datetime.max/min with an infinity symbol (`∞`) when rendering cells with those values.
## [0.11.0] - 2024-12-19
- Drops support for Python 3.8
- Adds support for Python 3.13
## [0.10.0] - 2024-10-31
- Adds an optional parameter to DataTable to disable rendering of string data as Rich Markup.
- Fixes a bug where None could be casted to a string and displayed as "None" ([tconbeer/harlequin#658](https://github.com/tconbeer/harlequin/issues/658))
## [0.9.0] - 2024-07-23
- Adds a PolarsBackend implementation of DataTableBackend. You must have `polars` installed to use the PolarsBackend. You can install it using the `polars` extra for this package.
- Fixes a crash from the ArrowBackend when attempting to instantiate negative datetimes after a timezone conversion.
## [0.8.0] - 2024-07-10
- Fixes a crash when cell contents contained bad Rich Markdown ([tconbeer/harlequin#569](https://github.com/tconbeer/harlequin/issues/569)).
- Improves the appearance of data tooltips.
## [0.7.1] - 2024-02-09
- Adds a `backend.source_data` property to exposue the underlying Arrow table, before slicing.
## [0.7.0] - 2024-02-07
### Breaking Changes
- Removes the NumpyBackend ([#78](https://github.com/tconbeer/textual-fastdatatable/issues/78)).
### Features
- Values are now formatted based on their type. Numbers have separators based on the locale, and numbers, dates/times/etc., and bools are right-aligned ([#70](https://github.com/tconbeer/textual-fastdatatable/issues/70)).
### Bug Fixes
- Fixes bug that caused either a crash or an empty table from initializing a table `from_records` or `from_pydict` with mixed (widening or narrowing) types in one column.
## [0.6.3] - 2024-01-09
### Bug Fixes
- Widens acceptable types for create_backend to accept a sequence of any iterable, not just iterables that are instances of typing.Iterable.
## [0.6.2] - 2024-01-08
### Bug Fixes
- Adds the tzdata package as a dependency for Windows installs, since Windows does not ship with a built-in tzdata database.
## [0.6.1] - 2024-01-05
### Bug Fixes
- Fixes the behavior of <kbd>tab</kbd> and <kbd>shift+tab</kbd> to cycle to the next/prev row if at the end/start of a row or table.
- Fixes a crash from pressing <kbd>ctrl+c</kbd> when the cursor type is column.
## [0.6.0] - 2024-01-05
### Features
- Adds keybindings for navigating the cursor in the data table. <kbd>ctrl+right/left/up/down/home/end</kbd> (with <kbd>shift</kbd> variants), <kbd>tab</kbd>, <kbd>shift+tab</kbd>, <kbd>ctrl+a</kbd> now all do roughly what they do in Excel (if the cursor type is `range`).
## [0.5.1] - 2024-01-05
### Bug Fixes
- Adds a dependency on pytz for Python &lt;3.9 for timezone support.
- Fixes a bug where Arrow crashes while casting timestamptz to string ([tconbeer/harlequin#382](https://github.com/tconbeer/harlequin/issues/382)).
### Performance
- Vectorizes fallback string casting for datatypes unsupported by `pc.cast` ([#8](https://github.com/tconbeer/textual-fastdatatable/issues/8))
## [0.5.0] - 2023-12-21
### Features
- Adds a `range` cursor type that will highlight a range of selected cells, like Excel.
- <kbd>ctrl+c</kbd> now posts a `SelectionCopied` message, with a values attribute that conttains a list of tuples of values from the data table.
- Adds a `max_column_content_width` parameter to DataTable. If set, DataTable will truncate values longer than the width, but show the full value in a tooltip on hover.
## [0.4.1] - 2023-12-14
- Fixes a crash caused by calling `create_backend` with an empty sequence.
## [0.4.0] - 2023-11-14
### Breaking API Changes
- When calling `create_backend` with a sequence of iterables, the default behavior now assumes the data does not contain headers. You can restore the old behavior with `create_backend(has_headers=True)`.
- When calling `DataTable(data=...)` with a sequence of iterables, the first row is treated as a header only if `column_labels` is not provided.
## [0.3.0] - 2023-11-11
### Features
- The DataTable now accepts a `max_rows` kwarg; if provided, backends will only store the first `max_rows` and the DataTable will only present `max_rows`. The original row count of the data source is available as DataTable().source_row_count ([tconbeer/harlequin#281](https://github.com/tconbeer/harlequin/issues/281)).
### API Changes
- Backends must now accept a `max_rows` kwarg on initialization.
## [0.2.1] - 2023-11-10
### Bug Fixes
- Tables with the ArrowBackend no longer display incorrect output when column labels are duplicated ([#26](https://github.com/tconbeer/textual-fastdatatable/issues/26)).
## [0.2.0] - 2023-11-08
### Features
- Adds a `null_rep: str` argument when initializing the data table; this string will be used to replace missing data.
- Adds a `NumpyBackend` that uses Numpy Record Arrays; this backend is marginally slower than the `ArrowBackend` in most scenarios ([#23](https://github.com/tconbeer/textual-fastdatatable/issues/23)).
### Bug Fixes
- Fixes a crash when using `ArrowBackend.from_records(has_header=False)`.
### Performance
- Drastically improves performance for tables that are much wider than the viewport ([#12](https://github.com/tconbeer/textual-fastdatatable/issues/12)).
### Benchmarks
- Improves benchmarks to exclude data load times, disable garbage collection, and include more information about first paint and scroll performance.
## [0.1.4] - 2023-11-06
- Fixes a crash when computing the widths of columns with no rows ([#19](https://github.com/tconbeer/textual-fastdatatable/issues/19)).
## [0.1.3] - 2023-10-09
- Fixes a crash when creating a column from a null or complex type.
## [0.1.2] - 2023-10-02
## [0.1.1] - 2023-09-29
- Fixes a crash when rows were added to an empty table.
## [0.1.0] - 2023-09-29
- Initial release. Adds DataTable and ArrowBackend, which is 1000x faster for datasets of 500k records or more.
[unreleased]: https://github.com/tconbeer/textual-fastdatatable/compare/0.12.0...HEAD
[0.12.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.11.0...0.12.0
[0.11.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.10.0...0.11.0
[0.10.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.9.0...0.10.0
[0.9.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.8.0...0.9.0
[0.8.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.7.1...0.8.0
[0.7.1]: https://github.com/tconbeer/textual-fastdatatable/compare/0.7.0...0.7.1
[0.7.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.6.3...0.7.0
[0.6.3]: https://github.com/tconbeer/textual-fastdatatable/compare/0.6.2...0.6.3
[0.6.2]: https://github.com/tconbeer/textual-fastdatatable/compare/0.6.1...0.6.2
[0.6.1]: https://github.com/tconbeer/textual-fastdatatable/compare/0.6.0...0.6.1
[0.6.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.5.1...0.6.0
[0.5.1]: https://github.com/tconbeer/textual-fastdatatable/compare/0.5.0...0.5.1
[0.5.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.4.1...0.5.0
[0.4.1]: https://github.com/tconbeer/textual-fastdatatable/compare/0.4.0...0.4.1
[0.4.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.3.0...0.4.0
[0.3.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.2.1...0.3.0
[0.2.1]: https://github.com/tconbeer/textual-fastdatatable/compare/0.2.0...0.2.1
[0.2.0]: https://github.com/tconbeer/textual-fastdatatable/compare/0.1.4...0.2.0
[0.1.4]: https://github.com/tconbeer/textual-fastdatatable/compare/0.1.3...0.1.4
[0.1.3]: https://github.com/tconbeer/textual-fastdatatable/compare/0.1.2...0.1.3
[0.1.2]: https://github.com/tconbeer/textual-fastdatatable/compare/0.1.1...0.1.2
[0.1.1]: https://github.com/tconbeer/textual-fastdatatable/compare/0.1.0...0.1.1
[0.1.0]: https://github.com/tconbeer/textual-fastdatatable/compare/4b9f99175d34f693dd0d4198c39d72f89caf6479...0.1.0

21
LICENSE Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Ted Conbeer
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

24
Makefile Normal file
View file

@ -0,0 +1,24 @@
.PHONY: check
check:
ruff format .
pytest
ruff check . --fix
mypy
.PHONY: lint
lint:
ruff format .
ruff check . --fix
mypy
.PHONY: serve
serve:
textual run --dev -c python -m textual_fastdatatable
.PHONY: profile
profile:
pyinstrument -r html -o profile.html "src/scripts/run_arrow_wide.py"
.PHONY: benchmark
benchmark:
python src/scripts/benchmark.py > /dev/null

131
README.md Normal file
View file

@ -0,0 +1,131 @@
# textual-fastdatatable
A performance-focused reimplementation of Textual's DataTable widget, with a pluggable data storage backend.
Textual's built-in DataTable widget is beautiful and powerful, but it can be slow to load large datasets.
Here are some benchmarks on my relatively weak laptop. For each benchmark, we initialize a Textual App that
loads a dataset from a parquet file and mounts a data table; it then scrolls around the table
(10 pagedowns and 15 right arrows).
For the built-in table and the others marked "from Records", the data is loaded into memory before the timer
is started; for the "Arrow from Parquet" back-end, the timer is started immediately.
The times in each column represent the time to the first paint of the table, and the time after scrolling
is completed (we wait until the table is fully rendered after each scroll):
Records | Built-In DataTable | FastDataTable (Arrow from Parquet) | FastDataTable (Arrow from Records) | FastDataTable (Numpy from Records)
--------|--------|--------|--------|--------
lap_times_100.parquet | 0.019s / 1.716s | 0.012s / 1.724s | 0.011s / 1.700s | 0.011s / 1.688s
lap_times_1000.parquet | 0.103s / 1.931s | 0.011s / 1.859s | 0.011s / 1.799s | 0.015s / 1.848s
lap_times_10000.parquet | 0.977s / 2.824s | 0.013s / 1.834s | 0.016s / 1.812s | 0.078s / 1.869s
lap_times_100000.parquet | 11.773s / 13.770s | 0.025s / 1.790s | 0.156s / 1.824s | 0.567s / 2.347s
lap_times_538121.parquet | 62.960s / 65.760s | 0.077s / 1.803s | 0.379s / 2.234s | 3.324s / 5.031s
wide_10000.parquet | 5.110s / 10.539s | 0.024s / 3.373s | 0.042s / 3.278s | 0.369s / 3.461s
wide_100000.parquet | 51.144s / 56.604s | 0.054s / 3.294s | 0.429s / 3.642s | 3.628s / 6.732s
**NB:** FastDataTable currently does not support rows with a height of more than one line. See below for
more limitations, relative to the built-in DataTable.
## Installation
```bash
pip install textual-fastdatatable
```
## Usage
If you already have data in Apache Arrow or another common table format:
```py
from textual_fastdatatable import DataTable
data_table = DataTable(data = my_data)
```
The currently supported types are:
```py
AutoBackendType = Union[
pa.Table,
pa.RecordBatch,
Path, # to parquet only
str, # path to parquet only
Sequence[Iterable[Any]],
Mapping[str, Sequence[Any]],
]
```
To override the column labels and widths supplied by the backend:
```py
from textual_fastdatatable import DataTable
data_table = DataTable(data = my_data, column_labels=["Supports", "[red]Console[/]", "Markup!"], column_widths=[10, 5, None])
```
You can also pass in a `backend` manually (if you want more control or want to plug in your own).
```py
from textual_fastdatatable import ArrowBackend, DataTable, create_backend
backend = create_backend(my_data)
backend = ArrowBackend(my_arrow_table)
# from python dictionary in the form key: col_values
backend = ArrowBackend.from_pydict(
{
"col one": [1, 2, 3 ,4],
"col two": ["a", "b", "c", "d"],
}
)
# from a list of tuples or another sequence of iterables
backend = ArrowBackend.from_records(
[
("col one", "col two"),
(1, "a"),
(2, "b"),
(3, "c"),
(4, "d"),
]
)
# from a path to a Parquet file:
backend = ArrowBackend.from_parquet("path/to/file.parquet")
```
## Limitations and Caveats
The `DataTable` does not currently support rows with a height of more than one line. Only the first line of each row will be displayed.
The `DataTable` does not currently support row labels.
The `ArrowBackend` is optimized to be fast for large, immutable datasets. Mutating the data,
especially adding or removing rows, may be slow.
The `ArrowBackend` cannot be initialized without data, however, the DataTable can (either with or without `column_labels`).
The `ArrowBackend` cannot store arbitrary Python objects or Rich Renderables as values. It may widen types to strings unnecessarily.
## Additional Features
### Copying Data from the Table
`ctrl+c` will post a SelectionCopied message with a list of tuples of the values selected by the cursor. To use, initialize with `cursor_type=range` from an app that does NOT inherit bindings.
```py
from textual.app import App, ComposeResult
from textual_fastdatatable import ArrowBackend, DataTable
class TableApp(App, inherit_bindings=False):
BINDINGS = [("ctrl+q", "quit", "Quit")]
def compose(self) -> ComposeResult:
backend = ArrowBackend.from_parquet("./tests/data/lap_times_538121.parquet")
yield DataTable(backend=backend, cursor_type="range")
if __name__ == "__main__":
app = TableApp()
app.run()
```
### Truncating long values
The `DataTable` will automatically calculate column widths; if you set a `max_column_content_width` at initialization, it will truncate any long values at that width; the full value will be visible on hover in a tooltip (and the full value will always be copied to the clipboard).

1769
poetry.lock generated Normal file

File diff suppressed because it is too large Load diff

81
pyproject.toml Normal file
View file

@ -0,0 +1,81 @@
[tool.poetry]
name = "textual-fastdatatable"
version = "0.12.0"
description = "A performance-focused reimplementation of Textual's DataTable widget, with a pluggable data storage backend."
authors = ["Ted Conbeer <tconbeer@users.noreply.github.com>"]
license = "MIT"
readme = "README.md"
packages = [
{ include = "textual_fastdatatable", from = "src" },
]
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.dependencies]
python = ">=3.9,<3.14"
textual = ">=0.89.1"
pyarrow = ">=16.1.0"
polars = { version = ">=0.20.0", optional = true }
tzdata = { version = ">=2023", markers = "sys_platform == 'win32'" } # arrow timestamptz support on windows
[tool.poetry.extras]
polars = ["polars"]
[tool.poetry.group.dev.dependencies]
pre-commit = "^3.3.1"
textual-dev = "^1.0.1"
pandas = "^2.1.1"
numpy = "^1"
pyinstrument = "^5"
[tool.poetry.group.static.dependencies]
ruff = "^0.5"
mypy = "^1.10.0"
pandas-stubs = "^2.1.1"
[tool.poetry.group.test.dependencies]
pytest = "^7.3.1"
pytest-asyncio = ">=0.21,<0.24"
pytest-textual-snapshot = ">=0.4.0"
polars = ">=0.20.0"
[tool.ruff]
target-version = "py39"
[tool.ruff.lint]
select = ["A", "B", "E", "F", "I"]
[tool.mypy]
python_version = "3.9"
files = [
"src/**/*.py",
"tests/unit_tests/**/*.py"
]
mypy_path = "src:stubs"
show_column_numbers = true
# show error messages from unrelated files
follow_imports = "normal"
# be strict
disallow_untyped_calls = true
disallow_untyped_defs = true
check_untyped_defs = true
disallow_untyped_decorators = true
disallow_incomplete_defs = true
disallow_subclassing_any = true
strict_optional = true
warn_return_any = true
warn_no_return = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_unused_configs = true
no_implicit_reexport = true
strict_equality = true

157
src/scripts/benchmark.py Normal file
View file

@ -0,0 +1,157 @@
from __future__ import annotations
import gc
from pathlib import Path
from time import perf_counter
import pandas as pd
import polars as pl
from textual.app import App, ComposeResult
from textual.driver import Driver
from textual.pilot import Pilot
from textual.types import CSSPathType
from textual.widgets import DataTable as BuiltinDataTable
from textual_fastdatatable import ArrowBackend
from textual_fastdatatable import DataTable as FastDataTable
from textual_fastdatatable.backend import PolarsBackend
BENCHMARK_DATA = Path(__file__).parent.parent.parent / "tests" / "data"
async def scroller(pilot: Pilot) -> None:
first_paint = perf_counter() - pilot.app.start # type: ignore
for _ in range(5):
await pilot.press("pagedown")
for _ in range(15):
await pilot.press("right")
for _ in range(5):
await pilot.press("pagedown")
elapsed = perf_counter() - pilot.app.start # type: ignore
pilot.app.exit(result=(first_paint, elapsed))
class BuiltinApp(App):
TITLE = "Built-In DataTable"
def __init__(
self,
data_path: Path,
driver_class: type[Driver] | None = None,
css_path: CSSPathType | None = None,
watch_css: bool = False,
):
super().__init__(driver_class, css_path, watch_css)
self.data_path = data_path
def compose(self) -> ComposeResult:
df = pd.read_parquet(self.data_path)
rows = [tuple(row) for row in df.itertuples(index=False)]
self.start = perf_counter()
table: BuiltinDataTable = BuiltinDataTable()
table.add_columns(*[str(col) for col in df.columns])
for row in rows:
table.add_row(*row, height=1, label=None)
yield table
class ArrowBackendApp(App):
TITLE = "FastDataTable (Arrow from Parquet)"
def __init__(
self,
data_path: Path,
driver_class: type[Driver] | None = None,
css_path: CSSPathType | None = None,
watch_css: bool = False,
):
super().__init__(driver_class, css_path, watch_css)
self.data_path = data_path
def compose(self) -> ComposeResult:
self.start = perf_counter()
yield FastDataTable(data=self.data_path)
class ArrowBackendAppFromRecords(App):
TITLE = "FastDataTable (Arrow from Records)"
def __init__(
self,
data_path: Path,
driver_class: type[Driver] | None = None,
css_path: CSSPathType | None = None,
watch_css: bool = False,
):
super().__init__(driver_class, css_path, watch_css)
self.data_path = data_path
def compose(self) -> ComposeResult:
df = pd.read_parquet(self.data_path)
rows = [tuple(row) for row in df.itertuples(index=False)]
self.start = perf_counter()
backend = ArrowBackend.from_records(rows, has_header=False)
table = FastDataTable(
backend=backend, column_labels=[str(col) for col in df.columns]
)
yield table
class PolarsBackendApp(App):
TITLE = "FastDataTable (Polars from Parquet)"
def __init__(
self,
data_path: Path,
driver_class: type[Driver] | None = None,
css_path: CSSPathType | None = None,
watch_css: bool = False,
):
super().__init__(driver_class, css_path, watch_css)
self.data_path = data_path
def compose(self) -> ComposeResult:
self.start = perf_counter()
yield FastDataTable(
data=PolarsBackend.from_dataframe(pl.read_parquet(self.data_path))
)
if __name__ == "__main__":
app_defs = [
BuiltinApp,
ArrowBackendApp,
ArrowBackendAppFromRecords,
PolarsBackendApp,
]
bench = [
(f"lap_times_{n}.parquet", 3 if n <= 10000 else 1)
for n in [100, 1000, 10000, 100000, 538121]
]
bench.extend([(f"wide_{n}.parquet", 1) for n in [10000, 100000]])
with open("results.md", "w") as f:
print(
"Records |",
" | ".join([a.TITLE for a in app_defs]), # type: ignore
sep="",
file=f,
)
print("--------|", "|".join(["--------" for _ in app_defs]), sep="", file=f)
for p, tries in bench:
first_paint: list[list[float]] = [list() for _ in app_defs]
elapsed: list[list[float]] = [list() for _ in app_defs]
for i, app_cls in enumerate(app_defs):
for _ in range(tries):
app = app_cls(BENCHMARK_DATA / p)
gc.disable()
fp, el = app.run(headless=True, auto_pilot=scroller) # type: ignore
gc.collect()
first_paint[i].append(fp)
elapsed[i].append(el)
gc.enable()
avg_first_paint = [sum(app_times) / tries for app_times in first_paint]
avg_elapsed = [sum(app_times) / tries for app_times in elapsed]
formatted = [
f"{fp:7,.3f}s / {el:7,.3f}s"
for fp, el in zip(avg_first_paint, avg_elapsed)
]
print(f"{p} | {' | '.join(formatted)}", file=f)

View file

@ -0,0 +1,32 @@
from __future__ import annotations
from pathlib import Path
from textual.app import App, ComposeResult
from textual.driver import Driver
from textual.types import CSSPathType
from textual_fastdatatable import DataTable
BENCHMARK_DATA = Path(__file__).parent.parent.parent / "tests" / "data"
class ArrowBackendApp(App):
TITLE = "FastDataTable (Arrow)"
def __init__(
self,
data_path: Path,
driver_class: type[Driver] | None = None,
css_path: CSSPathType | None = None,
watch_css: bool = False,
):
super().__init__(driver_class, css_path, watch_css)
self.data_path = data_path
def compose(self) -> ComposeResult:
yield DataTable(data=self.data_path)
if __name__ == "__main__":
app = ArrowBackendApp(data_path=BENCHMARK_DATA / "wide_100000.parquet")
app.run()

View file

@ -0,0 +1,39 @@
from __future__ import annotations
from pathlib import Path
import pandas as pd
from textual.app import App, ComposeResult
from textual.driver import Driver
from textual.types import CSSPathType
from textual.widgets import DataTable
BENCHMARK_DATA = Path(__file__).parent.parent.parent / "tests" / "data"
class BuiltinApp(App):
TITLE = "Built-In DataTable"
def __init__(
self,
data_path: Path,
driver_class: type[Driver] | None = None,
css_path: CSSPathType | None = None,
watch_css: bool = False,
):
super().__init__(driver_class, css_path, watch_css)
self.data_path = data_path
def compose(self) -> ComposeResult:
df = pd.read_parquet(self.data_path)
rows = [tuple(row) for row in df.itertuples(index=False)]
table: DataTable = DataTable()
table.add_columns(*[str(col) for col in df.columns])
for row in rows:
table.add_row(*row, height=1, label=None)
yield table
if __name__ == "__main__":
app = BuiltinApp(data_path=BENCHMARK_DATA / "wide_10000.parquet")
app.run()

View file

@ -0,0 +1,13 @@
from textual_fastdatatable.backend import (
ArrowBackend,
DataTableBackend,
create_backend,
)
from textual_fastdatatable.data_table import DataTable
__all__ = [
"DataTable",
"ArrowBackend",
"DataTableBackend",
"create_backend",
]

View file

@ -0,0 +1,19 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import ArrowBackend, DataTable
class TableApp(App, inherit_bindings=False):
BINDINGS = [("ctrl+q", "quit", "Quit"), ("ctrl+d", "quit", "Quit")]
def compose(self) -> ComposeResult:
backend = ArrowBackend.from_parquet("./tests/data/wide_100000.parquet")
yield DataTable(backend=backend, cursor_type="range", fixed_columns=2)
if __name__ == "__main__":
import locale
locale.setlocale(locale.LC_ALL, "")
app = TableApp()
app.run()

View file

@ -0,0 +1,706 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from contextlib import suppress
from datetime import date, datetime
from pathlib import Path
from typing import (
Any,
Dict,
Generic,
Iterable,
Literal,
Mapping,
Sequence,
TypeVar,
)
import pyarrow as pa
import pyarrow.compute as pc
import pyarrow.lib as pal
import pyarrow.parquet as pq
import pyarrow.types as pt
from rich.console import Console
from textual_fastdatatable.formatter import measure_width
AutoBackendType = Any
try:
import polars as pl
import polars.datatypes as pld
except ImportError:
_HAS_POLARS = False
else:
_HAS_POLARS = True
def create_backend(
data: "AutoBackendType",
max_rows: int | None = None,
has_header: bool = False,
) -> DataTableBackend:
if isinstance(data, pa.Table):
return ArrowBackend(data, max_rows=max_rows)
if isinstance(data, pa.RecordBatch):
return ArrowBackend.from_batches(data, max_rows=max_rows)
if _HAS_POLARS and isinstance(data, pl.DataFrame):
return PolarsBackend.from_dataframe(data, max_rows=max_rows)
if isinstance(data, Path) or isinstance(data, str):
data = Path(data)
if data.suffix in [".pqt", ".parquet"]:
return ArrowBackend.from_parquet(data, max_rows=max_rows)
if _HAS_POLARS:
return PolarsBackend.from_file_path(
data, max_rows=max_rows, has_header=has_header
)
if isinstance(data, Sequence) and not data:
return ArrowBackend(pa.table([]), max_rows=max_rows)
if isinstance(data, Sequence) and _is_iterable(data[0]):
return ArrowBackend.from_records(data, max_rows=max_rows, has_header=has_header)
if (
isinstance(data, Mapping)
and isinstance(next(iter(data.keys())), str)
and isinstance(next(iter(data.values())), Sequence)
):
return ArrowBackend.from_pydict(data, max_rows=max_rows)
raise TypeError(
f"Cannot automatically create backend for data of type: {type(data)}. "
f"Data must be of type: Union[pa.Table, pa.RecordBatch, Path, str, "
"Sequence[Iterable[Any]], Mapping[str, Sequence[Any]], pl.DataFrame",
)
def _is_iterable(item: Any) -> bool:
try:
iter(item)
except TypeError:
return False
else:
return True
_TableTypeT = TypeVar("_TableTypeT")
class DataTableBackend(ABC, Generic[_TableTypeT]):
data: _TableTypeT
@abstractmethod
def __init__(self, data: _TableTypeT, max_rows: int | None = None) -> None:
pass
@classmethod
@abstractmethod
def from_pydict(
cls, data: Mapping[str, Sequence[Any]], max_rows: int | None = None
) -> "DataTableBackend":
pass
@property
@abstractmethod
def source_data(self) -> _TableTypeT:
"""
Return the source data as an Arrow table
"""
pass
@property
@abstractmethod
def source_row_count(self) -> int:
"""
The number of rows in the source data, before filtering down to max_rows
"""
pass
@property
@abstractmethod
def row_count(self) -> int:
"""
The number of rows in backend's retained data, after filtering down to max_rows
"""
pass
@property
def column_count(self) -> int:
return len(self.columns)
@property
@abstractmethod
def columns(self) -> Sequence[str]:
"""
A list of column labels
"""
pass
@property
@abstractmethod
def column_content_widths(self) -> Sequence[int]:
"""
A list of integers corresponding to the widest utf8 string length
of any data in each column.
"""
pass
@abstractmethod
def get_row_at(self, index: int) -> Sequence[Any]:
pass
@abstractmethod
def get_column_at(self, index: int) -> Sequence[Any]:
pass
@abstractmethod
def get_cell_at(self, row_index: int, column_index: int) -> Any:
pass
@abstractmethod
def append_column(self, label: str, default: Any | None = None) -> int:
"""
Returns column index
"""
@abstractmethod
def append_rows(self, records: Iterable[Iterable[Any]]) -> list[int]:
"""
Returns new row indicies
"""
pass
@abstractmethod
def drop_row(self, row_index: int) -> None:
pass
@abstractmethod
def update_cell(self, row_index: int, column_index: int, value: Any) -> None:
"""
Raises IndexError if bad indicies
"""
@abstractmethod
def sort(
self, by: list[tuple[str, Literal["ascending", "descending"]]] | str
) -> None:
"""
by: str sorts table by the data in the column with that name (asc).
by: list[tuple] sorts the table by the named column(s) with the directions
indicated.
"""
class ArrowBackend(DataTableBackend[pa.Table]):
def __init__(self, data: pa.Table, max_rows: int | None = None) -> None:
self._source_data = data
# Arrow allows duplicate field names, but a table's to_pylist() and
# to_pydict() methods will drop duplicate-named fields!
field_names: list[str] = []
renamed = False
for field in data.column_names:
n = 0
while field in field_names:
field = f"{field}{n}"
renamed = True
n += 1
field_names.append(field)
if renamed:
data = data.rename_columns(field_names)
self._source_row_count = data.num_rows
if max_rows is not None and max_rows < self._source_row_count:
self.data = data.slice(offset=0, length=max_rows)
else:
self.data = data
self._console = Console()
self._column_content_widths: list[int] = []
@staticmethod
def _pydict_from_records(
records: Sequence[Iterable[Any]], has_header: bool = False
) -> dict[str, list[Any]]:
headers = (
records[0]
if has_header
else [f"f{i}" for i in range(len(list(records[0])))]
)
data = list(map(list, records[1:] if has_header else records))
pydict = {header: [row[i] for row in data] for i, header in enumerate(headers)}
return pydict
@staticmethod
def _handle_overflow(scalar: pa.Scalar) -> Any | None:
"""
PyArrow may throw an OverflowError when casting arrow types
to python types; in some cases we can catch these and
present a sensible value in the data table; otherwise
we return None.
"""
if pt.is_date32(scalar.type):
if scalar.value > 0: # type: ignore[attr-defined]
return date.max
elif scalar.value <= 0: # type: ignore[attr-defined]
return date.min
elif pt.is_date64(scalar.type):
if scalar.value > 0: # type: ignore[attr-defined]
return date.max
elif scalar.value <= 0: # type: ignore[attr-defined]
return date.min
elif pt.is_timestamp(scalar.type):
if scalar.value > 0: # type: ignore[attr-defined]
return datetime.max
elif scalar.value <= 0: # type: ignore[attr-defined]
return datetime.min
return None
@classmethod
def from_batches(
cls, data: pa.RecordBatch, max_rows: int | None = None
) -> "ArrowBackend":
tbl = pa.Table.from_batches([data])
return cls(tbl, max_rows=max_rows)
@classmethod
def from_parquet(
cls, path: Path | str, max_rows: int | None = None
) -> "ArrowBackend":
tbl = pq.read_table(str(path))
return cls(tbl, max_rows=max_rows)
@classmethod
def from_pydict(
cls, data: Mapping[str, Sequence[Any]], max_rows: int | None = None
) -> "ArrowBackend":
try:
tbl = pa.Table.from_pydict(dict(data))
except (pal.ArrowInvalid, pal.ArrowTypeError):
# one or more fields has mixed types, like int and
# string. Cast all to string for safety
new_data = {
k: [str(val) if val is not None else None for val in v]
for k, v in data.items()
}
tbl = pa.Table.from_pydict(new_data)
return cls(tbl, max_rows=max_rows)
@classmethod
def from_records(
cls,
records: Sequence[Iterable[Any]],
has_header: bool = False,
max_rows: int | None = None,
) -> "ArrowBackend":
pydict = cls._pydict_from_records(records, has_header)
return cls.from_pydict(pydict, max_rows=max_rows)
@property
def source_data(self) -> pa.Table:
return self._source_data
@property
def source_row_count(self) -> int:
return self._source_row_count
@property
def row_count(self) -> int:
return self.data.num_rows
@property
def column_count(self) -> int:
return self.data.num_columns
@property
def columns(self) -> Sequence[str]:
return self.data.column_names
@property
def column_content_widths(self) -> list[int]:
if not self._column_content_widths:
measurements = [self._measure(arr) for arr in self.data.columns]
# pc.max returns None for each column without rows; we need to return 0
# instead.
self._column_content_widths = [cw or 0 for cw in measurements]
return self._column_content_widths
def get_row_at(self, index: int) -> Sequence[Any]:
try:
row: Dict[str, Any] = self.data.slice(index, length=1).to_pylist()[0]
except OverflowError:
return [
self._handle_overflow(self.data[i][index])
for i in range(len(self.columns))
]
else:
return list(row.values())
def get_column_at(self, column_index: int) -> list[Any]:
try:
values = self.data[column_index].to_pylist()
except OverflowError:
# TODO: consider registering a scalar UDF here for parallel processing
return [self._handle_overflow(scalar) for scalar in self.data[column_index]]
else:
return values
def get_cell_at(self, row_index: int, column_index: int) -> Any:
scalar = self.data[column_index][row_index]
try:
value = scalar.as_py()
except OverflowError:
value = self._handle_overflow(scalar)
return value
def append_column(self, label: str, default: Any | None = None) -> int:
"""
Returns column index
"""
if default is None:
arr: pa.Array = pa.nulls(self.row_count)
else:
arr = pa.nulls(self.row_count, type=pa.string())
arr = arr.fill_null(str(default))
self.data = self.data.append_column(label, arr)
if self._column_content_widths:
self._column_content_widths.append(measure_width(default, self._console))
return self.data.num_columns - 1
def append_rows(self, records: Iterable[Iterable[Any]]) -> list[int]:
rows = list(records)
indicies = list(range(self.row_count, self.row_count + len(rows)))
records_with_headers = [self.data.column_names, *rows]
pydict = self._pydict_from_records(records_with_headers, has_header=True)
old_rows = self.data.to_batches()
new_rows = pa.RecordBatch.from_pydict(
pydict,
schema=self.data.schema,
)
self.data = pa.Table.from_batches([*old_rows, new_rows])
self._reset_content_widths()
return indicies
def drop_row(self, row_index: int) -> None:
if row_index < 0 or row_index >= self.row_count:
raise IndexError(f"Can't drop row {row_index} of {self.row_count}")
above = self.data.slice(0, row_index).to_batches()
below = self.data.slice(row_index + 1).to_batches()
self.data = pa.Table.from_batches([*above, *below])
self._reset_content_widths()
pass
def update_cell(self, row_index: int, column_index: int, value: Any) -> None:
column = self.data.column(column_index)
pycolumn = self.get_column_at(column_index=column_index)
pycolumn[row_index] = value
new_type = pa.string() if pt.is_null(column.type) else column.type
self.data = self.data.set_column(
column_index,
self.data.column_names[column_index],
pa.array(pycolumn, type=new_type),
)
if self._column_content_widths:
self._column_content_widths[column_index] = max(
measure_width(value, self._console),
self._column_content_widths[column_index],
)
def sort(
self, by: list[tuple[str, Literal["ascending", "descending"]]] | str
) -> None:
"""
by: str sorts table by the data in the column with that name (asc).
by: list[tuple] sorts the table by the named column(s) with the directions
indicated.
"""
self.data = self.data.sort_by(by)
def _reset_content_widths(self) -> None:
self._column_content_widths = []
def _measure(self, arr: pa._PandasConvertible) -> int:
# with some types we can measure the width more efficiently
if pt.is_boolean(arr.type):
return 7
elif pt.is_null(arr.type):
return 0
elif (
pt.is_integer(arr.type)
or pt.is_floating(arr.type)
or pt.is_decimal(arr.type)
):
try:
col_max = pc.max(arr.fill_null(0)).as_py()
except OverflowError:
col_max = 9223372036854775807
try:
col_min = pc.min(arr.fill_null(0)).as_py()
except OverflowError:
col_min = -9223372036854775807
return max([measure_width(el, self._console) for el in [col_max, col_min]])
elif pt.is_temporal(arr.type):
try:
value = arr.drop_null()[0].as_py()
except OverflowError:
return 26 # need space for the infinity sign and a space
except IndexError:
return 24
else:
# valid temporal types all have the same width for their type
return measure_width(value, self._console)
# for everything else, we need to compute it
# First, cast the data to strings
try:
arr = arr.cast(
pa.string(),
safe=False,
)
except (pal.ArrowNotImplementedError, pal.ArrowInvalid):
# some types can't be casted to strings natively by arrow, but they
# can be casted to strings by python. The arrow way is faster, but
# if it fails, register a python udf and try again
def py_str(_ctx: Any, arr: pa.Array) -> str | pa.Array | pa.ChunkedArray:
return pa.array([str(el) for el in arr], type=pa.string())
udf_name = f"tfdt_pystr_{arr.type}"
with suppress(pal.ArrowKeyError): # already registered
pc.register_scalar_function(
py_str,
function_name=udf_name,
function_doc={"summary": "str", "description": "built-in str"},
in_types={"arr": arr.type},
out_type=pa.string(),
)
arr = pc.call_function(udf_name, [arr])
# next, try to measure the UTF-encoded string length of each cell,
# then take the max
try:
width: int = pc.max(pc.utf8_length(arr.fill_null("")).fill_null(0)).as_py()
except OverflowError:
width = 10
return width
if _HAS_POLARS:
class PolarsBackend(DataTableBackend[pl.DataFrame]):
@classmethod
def from_file_path(
cls, path: Path, max_rows: int | None = None, has_header: bool = True
) -> "PolarsBackend":
if path.suffix in [".arrow", ".feather"]:
tbl = pl.read_ipc(path)
elif path.suffix == ".arrows":
tbl = pl.read_ipc_stream(path)
elif path.suffix == ".json":
tbl = pl.read_json(path)
elif path.suffix == ".csv":
tbl = pl.read_csv(path, has_header=has_header)
else:
raise TypeError(
f"Dont know how to load file type {path.suffix} for {path}"
)
return cls(tbl, max_rows=max_rows)
@classmethod
def from_pydict(
cls, pydict: Mapping[str, Sequence[Any]], max_rows: int | None = None
) -> "PolarsBackend":
return cls(pl.from_dict(pydict), max_rows=max_rows)
@classmethod
def from_dataframe(
cls, frame: pl.DataFrame, max_rows: int | None = None
) -> "PolarsBackend":
return cls(frame, max_rows=max_rows)
def __init__(self, data: pl.DataFrame, max_rows: int | None = None) -> None:
self._source_data = data
# Arrow allows duplicate field names, but a table's to_pylist() and
# to_pydict() methods will drop duplicate-named fields!
field_names: list[str] = []
for field in data.columns:
n = 0
while field in field_names:
field = f"{field}{n}"
n += 1
field_names.append(field)
data.columns = field_names
self._source_row_count = len(data)
if max_rows is not None and max_rows < self._source_row_count:
self.data = data.slice(offset=0, length=max_rows)
else:
self.data = data
self._console = Console()
self._column_content_widths: list[int] = []
@property
def source_data(self) -> pl.DataFrame:
return self._source_data
@property
def source_row_count(self) -> int:
return self._source_row_count
@property
def row_count(self) -> int:
return len(self.data)
@property
def column_count(self) -> int:
return len(self.data.columns)
@property
def columns(self) -> Sequence[str]:
return self.data.columns
def get_row_at(self, index: int) -> Sequence[Any]:
if index < 0 or index >= len(self.data):
raise IndexError(
f"Cannot get row={index} in table with {len(self.data)} rows "
f"and {len(self.data.columns)} cols"
)
return list(self.data.slice(index, length=1).to_dicts()[0].values())
def get_column_at(self, column_index: int) -> Sequence[Any]:
if column_index < 0 or column_index >= len(self.data.columns):
raise IndexError(
f"Cannot get column={column_index} in table with {len(self.data)} "
f"rows and {len(self.data.columns)} cols."
)
return list(self.data.to_series(column_index))
def get_cell_at(self, row_index: int, column_index: int) -> Any:
if (
row_index >= len(self.data)
or row_index < 0
or column_index < 0
or column_index >= len(self.data.columns)
):
raise IndexError(
f"Cannot get cell at row={row_index} col={column_index} in table "
f"with {len(self.data)} rows and {len(self.data.columns)} cols"
)
return self.data.to_series(column_index)[row_index]
def drop_row(self, row_index: int) -> None:
if row_index < 0 or row_index >= self.row_count:
raise IndexError(f"Can't drop row {row_index} of {self.row_count}")
above = self.data.slice(0, row_index)
below = self.data.slice(row_index + 1)
self.data = pl.concat([above, below])
self._reset_content_widths()
def append_rows(self, records: Iterable[Iterable[Any]]) -> list[int]:
rows_to_add = pl.from_dicts(
[dict(zip(self.data.columns, row)) for row in records]
)
indicies = list(range(self.row_count, self.row_count + len(rows_to_add)))
self.data = pl.concat([self.data, rows_to_add])
self._reset_content_widths()
return indicies
def append_column(self, label: str, default: Any | None = None) -> int:
"""
Returns column index
"""
self.data = self.data.with_columns(
pl.Series([default])
.extend_constant(default, self.row_count - 1)
.alias(label)
)
if self._column_content_widths:
self._column_content_widths.append(
measure_width(default, self._console)
)
return len(self.data.columns) - 1
def _reset_content_widths(self) -> None:
self._column_content_widths = []
def update_cell(self, row_index: int, column_index: int, value: Any) -> None:
if row_index >= len(self.data) or column_index >= len(self.data.columns):
raise IndexError(
f"Cannot update cell at row={row_index} col={column_index} in "
f"table with {len(self.data)} rows and "
f"{len(self.data.columns)} cols"
)
col_name = self.data.columns[column_index]
self.data = self.data.with_columns(
self.data.to_series(column_index)
.scatter(row_index, value)
.alias(col_name)
)
if self._column_content_widths:
self._column_content_widths[column_index] = max(
measure_width(value, self._console),
self._column_content_widths[column_index],
)
@property
def column_content_widths(self) -> list[int]:
if not self._column_content_widths:
measurements = [
self._measure(self.data[arr]) for arr in self.data.columns
]
# pc.max returns None for each column without rows; we need to return 0
# instead.
self._column_content_widths = [cw or 0 for cw in measurements]
return self._column_content_widths
def _measure(self, arr: pl.Series) -> int:
# with some types we can measure the width more efficiently
dtype = arr.dtype
if dtype == pld.Categorical():
return self._measure(arr.cat.get_categories())
if dtype.is_decimal() or dtype.is_float() or dtype.is_integer():
col_max = arr.max()
col_min = arr.min()
return max(
[measure_width(el, self._console) for el in [col_max, col_min]]
)
if dtype.is_temporal():
try:
value = arr.drop_nulls()[0]
except IndexError:
return 0
else:
return measure_width(value, self._console)
if dtype.is_(pld.Boolean()):
return 7
# for everything else, we need to compute it
arr = arr.cast(
pl.Utf8(),
strict=False,
)
width = arr.fill_null("<null>").str.len_chars().max()
assert isinstance(width, int)
return width
def sort(
self, by: list[tuple[str, Literal["ascending", "descending"]]] | str
) -> None:
"""
by: str sorts table by the data in the column with that name (asc).
by: list[tuple] sorts the table by the named column(s) with the directions
indicated.
"""
if isinstance(by, str):
cols = [by]
typs = [False]
else:
cols = [x for x, _ in by]
typs = [x == "descending" for _, x in by]
self.data = self.data.sort(cols, descending=typs)

View file

@ -0,0 +1,47 @@
from __future__ import annotations
import re
from dataclasses import dataclass
from rich.text import Text
CELL_X_PADDING = 2
SNAKE_ID_PROG = re.compile(r"(\b|_)id\b", flags=re.IGNORECASE)
CAMEL_ID_PROG = re.compile(r"[a-z]I[dD]\b")
@dataclass
class Column:
"""Metadata for a column in the DataTable."""
label: Text
width: int = 0
content_width: int = 0
auto_width: bool = False
max_content_width: int | None = None
def __post_init__(self) -> None:
self._is_id: bool | None = None
@property
def render_width(self) -> int:
"""Width in cells, required to render a column."""
# +2 is to account for space padding either side of the cell
if self.auto_width and self.max_content_width is not None:
return (
min(max(len(self.label), self.content_width), self.max_content_width)
+ CELL_X_PADDING
)
elif self.auto_width:
return max(len(self.label), self.content_width) + CELL_X_PADDING
else:
return self.width + CELL_X_PADDING
@property
def is_id(self) -> bool:
if self._is_id is None:
snake_id = SNAKE_ID_PROG.search(str(self.label)) is not None
camel_id = CAMEL_ID_PROG.search(str(self.label)) is not None
self._is_id = snake_id or camel_id
return self._is_id

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,101 @@
from __future__ import annotations
from datetime import date, datetime, time, timedelta
from decimal import Decimal
from typing import cast
from rich.align import Align
from rich.console import Console, RenderableType
from rich.errors import MarkupError
from rich.markup import escape
from rich.protocol import is_renderable
from rich.text import Text
from textual_fastdatatable.column import Column
def cell_formatter(
obj: object, null_rep: Text, col: Column | None = None, render_markup: bool = True
) -> RenderableType:
"""Convert a cell into a Rich renderable for display.
For correct formatting, clients should call `locale.setlocale()` first.
Args:
obj: Data for a cell.
col: Column that the cell came from (used to compute width).
Returns:
A renderable to be displayed which represents the data.
"""
if obj is None:
return Align(null_rep, align="center")
elif isinstance(obj, str) and render_markup:
try:
rich_text: Text | str = Text.from_markup(obj)
except MarkupError:
rich_text = escape(obj)
return rich_text
elif isinstance(obj, str):
return escape(obj)
elif isinstance(obj, bool):
return Align(
f"[dim]{'' if obj else 'X'}[/] {obj}{' ' if obj else ''}",
style="bold" if obj else "",
align="right",
)
elif isinstance(obj, (float, Decimal)):
return Align(f"{obj:n}", align="right")
elif isinstance(obj, int):
if col is not None and col.is_id:
# no separators in ID fields
return Align(str(obj), align="right")
else:
return Align(f"{obj:n}", align="right")
elif isinstance(obj, (datetime, time)):
def _fmt_datetime(obj: datetime | time) -> str:
return obj.isoformat(timespec="milliseconds").replace("+00:00", "Z")
if obj in (datetime.max, datetime.min):
return Align(
(
f"[bold]{'' if obj == datetime.max else '-∞ '}[/]"
f"[dim]{_fmt_datetime(obj)}[/]"
),
align="right",
)
return Align(_fmt_datetime(obj), align="right")
elif isinstance(obj, date):
if obj in (date.max, date.min):
return Align(
(
f"[bold]{'' if obj == date.max else '-∞ '}[/]"
f"[dim]{obj.isoformat()}[/]"
),
align="right",
)
return Align(obj.isoformat(), align="right")
elif isinstance(obj, timedelta):
return Align(str(obj), align="right")
elif not is_renderable(obj):
return str(obj)
else:
return cast(RenderableType, obj)
def measure_width(obj: object, console: Console) -> int:
renderable = cell_formatter(obj, null_rep=Text(""))
return console.measure(renderable).maximum

View file

166
stubs/pyarrow/__init__.pyi Normal file
View file

@ -0,0 +1,166 @@
from __future__ import annotations
from typing import Any, Iterable, Iterator, Literal, Mapping, Sequence, Type, TypeVar
import pandas as pd
from .compute import CastOptions
class DataType: ...
class Date32Type(DataType): ...
class Date64Type(DataType): ...
class TimestampType(DataType): ...
def string() -> DataType: ...
def null() -> DataType: ...
def bool_() -> DataType: ...
def int8() -> DataType: ...
def int16() -> DataType: ...
def int32() -> DataType: ...
def int64() -> DataType: ...
def uint8() -> DataType: ...
def uint16() -> DataType: ...
def uint32() -> DataType: ...
def uint64() -> DataType: ...
def float16() -> DataType: ...
def float32() -> DataType: ...
def float64() -> DataType: ...
def date32() -> DataType: ...
def date64() -> DataType: ...
def binary(length: int = -1) -> DataType: ...
def large_binary() -> DataType: ...
def large_string() -> DataType: ...
def month_day_nano_interval() -> DataType: ...
def time32(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
def time64(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
def timestamp(
unit: Literal["s", "ms", "us", "ns"], tz: str | None = None
) -> DataType: ...
def duration(unit: Literal["s", "ms", "us", "ns"]) -> DataType: ...
class MemoryPool: ...
class Schema: ...
class Field: ...
class NativeFile: ...
class MonthDayNano: ...
class Scalar:
def as_py(self) -> Any: ...
@property
def type(self) -> DataType: ...
A = TypeVar("A", bound="_PandasConvertible")
class _PandasConvertible:
@property
def type(self) -> DataType: ... # noqa: A003
def cast(
self: A,
target_type: DataType | None = None,
safe: bool = True,
options: CastOptions | None = None,
) -> A: ...
def __getitem__(self, index: int) -> Scalar: ...
def __iter__(self) -> Any: ...
def to_pylist(self) -> list[Any]: ...
def fill_null(self: A, fill_value: Any) -> A: ...
def drop_null(self: A) -> A: ...
class Array(_PandasConvertible): ...
class ChunkedArray(_PandasConvertible): ...
class StructArray(Array):
def flatten(self, memory_pool: MemoryPool | None = None) -> list[Array]: ...
T = TypeVar("T", bound="_Tabular")
class _Tabular:
@classmethod
def from_arrays(
cls: Type[T],
arrays: list[_PandasConvertible],
names: list[str] | None = None,
schema: Schema | None = None,
metadata: Mapping | None = None,
) -> T: ...
@classmethod
def from_pydict(
cls: Type[T],
mapping: Mapping,
schema: Schema | None = None,
metadata: Mapping | None = None,
) -> T: ...
def __getitem__(self, index: int) -> _PandasConvertible: ...
def __len__(self) -> int: ...
@property
def column_names(self) -> list[str]: ...
@property
def columns(self) -> list[_PandasConvertible]: ...
@property
def num_rows(self) -> int: ...
@property
def num_columns(self) -> int: ...
@property
def schema(self) -> Schema: ...
def append_column(
self: T, field_: str | Field, column: Array | ChunkedArray
) -> T: ...
def column(self, i: int | str) -> _PandasConvertible: ...
def equals(self: T, other: T, check_metadata: bool = False) -> bool: ...
def itercolumns(self) -> Iterator[_PandasConvertible]: ...
def rename_columns(self: T, names: list[str]) -> T: ...
def select(self: T, columns: Sequence[str | int]) -> T: ...
def set_column(
self: T, i: int, field_: str | Field, column: Array | ChunkedArray
) -> T: ...
def slice( # noqa: A003
self: T,
offset: int = 0,
length: int | None = None,
) -> T: ...
def sort_by(
self: T,
sorting: str | list[tuple[str, Literal["ascending", "descending"]]],
**kwargs: Any,
) -> T: ...
def to_pylist(self) -> list[dict[str, Any]]: ...
class RecordBatch(_Tabular): ...
class Table(_Tabular):
@classmethod
def from_batches(
cls,
batches: Iterable[RecordBatch],
schema: Schema | None = None,
) -> "Table": ...
def to_batches(self) -> list[RecordBatch]: ...
def scalar(value: Any, type: DataType) -> Scalar: ... # noqa: A002
def array(
obj: Iterable,
type: DataType | None = None, # noqa: A002
mask: Array | None = None,
size: int | None = None,
from_pandas: bool | None = None,
safe: bool = True,
memory_pool: MemoryPool | None = None,
) -> Array | ChunkedArray: ...
def concat_arrays(
arrays: Iterable[Array], memory_pool: MemoryPool | None = None
) -> Array: ...
def nulls(
size: int,
type: DataType | None = None, # noqa: A002
memory_pool: MemoryPool | None = None,
) -> Array: ...
def table(
data: pd.DataFrame
| Mapping[str, _PandasConvertible | list]
| list[_PandasConvertible],
names: list[str] | None = None,
schema: Schema | None = None,
metadata: Mapping | None = None,
nthreads: int | None = None,
) -> Table: ...
def set_timezone_db_path(path: str) -> None: ...

64
stubs/pyarrow/compute.pyi Normal file
View file

@ -0,0 +1,64 @@
from __future__ import annotations
from datetime import datetime
from typing import Any, Callable, Literal
from . import DataType, MemoryPool, Scalar, _PandasConvertible
class Expression: ...
class ScalarAggregateOptions: ...
class CastOptions:
def __init__(
self,
target_type: DataType | None = None,
allow_int_overflow: bool | None = None,
allow_time_truncate: bool | None = None,
allow_time_overflow: bool | None = None,
allow_decimal_truncate: bool | None = None,
allow_float_truncate: bool | None = None,
allow_invalid_utf8: bool | None = None,
) -> None: ...
def max( # noqa: A001
array: _PandasConvertible,
/,
*,
skip_nulls: bool = True,
min_count: int = 1,
options: ScalarAggregateOptions | None = None,
memory_pool: MemoryPool | None = None,
) -> Scalar: ...
def min( # noqa: A001
array: _PandasConvertible,
/,
*,
skip_nulls: bool = True,
min_count: int = 1,
options: ScalarAggregateOptions | None = None,
memory_pool: MemoryPool | None = None,
) -> Scalar: ...
def utf8_length(
strings: _PandasConvertible, /, *, memory_pool: MemoryPool | None = None
) -> _PandasConvertible: ...
def register_scalar_function(
func: Callable,
function_name: str,
function_doc: dict[Literal["summary", "description"], str],
in_types: dict[str, DataType],
out_type: DataType,
func_registry: Any | None = None,
) -> None: ...
def call_function(
function_name: str, target: list[_PandasConvertible]
) -> _PandasConvertible: ...
def assume_timezone(
timestamps: _PandasConvertible | Scalar | datetime,
/,
timezone: str,
*,
ambiguous: Literal["raise", "earliest", "latest"] = "raise",
nonexistent: Literal["raise", "earliest", "latest"] = "raise",
options: Any | None = None,
memory_pool: MemoryPool | None = None,
) -> _PandasConvertible: ...

View file

@ -0,0 +1 @@
class Partitioning: ...

1
stubs/pyarrow/fs.pyi Normal file
View file

@ -0,0 +1 @@
class FileSystem: ...

32
stubs/pyarrow/lib.pyi Normal file
View file

@ -0,0 +1,32 @@
from . import Date32Type, Date64Type, Scalar, TimestampType
class ArrowException(Exception): ...
class ArrowInvalid(ValueError, ArrowException): ...
class ArrowMemoryError(MemoryError, ArrowException): ...
class ArrowKeyError(KeyError, Exception): ...
class ArrowTypeError(TypeError, Exception): ...
class ArrowNotImplementedError(NotImplementedError, ArrowException): ...
class ArrowCapacityError(ArrowException): ...
class ArrowIndexError(IndexError, ArrowException): ...
class ArrowSerializationError(ArrowException): ...
class ArrowCancelled(ArrowException): ...
ArrowIOError = IOError
class Date32Scalar(Scalar):
@property
def type(self) -> Date32Type: ...
@property
def value(self) -> int: ...
class Date64Scalar(Scalar):
@property
def type(self) -> Date64Type: ...
@property
def value(self) -> int: ...
class TimestampScalar(Scalar):
@property
def type(self) -> TimestampType: ...
@property
def value(self) -> int: ...

60
stubs/pyarrow/parquet.pyi Normal file
View file

@ -0,0 +1,60 @@
from __future__ import annotations
from typing import Any, BinaryIO, Literal
from . import NativeFile, Schema, Table
from .compute import Expression
from .dataset import Partitioning
from .fs import FileSystem
class FileMetaData: ...
def read_table(
source: str | NativeFile | BinaryIO,
*,
columns: list | None = None,
use_threads: bool = True,
metadata: FileMetaData | None = None,
schema: Schema | None = None,
use_pandas_metadata: bool = False,
read_dictionary: list | None = None,
memory_map: bool = False,
buffer_size: int = 0,
partitioning: Partitioning | str | list[str] = "hive",
filesystem: FileSystem | None = None,
filters: Expression | list[tuple] | list[list[tuple]] | None = None,
use_legacy_dataset: bool = False,
ignore_prefixes: list | None = None,
pre_buffer: bool = True,
coerce_int96_timestamp_unit: str | None = None,
decryption_properties: Any | None = None,
thrift_string_size_limit: int | None = None,
thrift_container_size_limit: int | None = None,
) -> Table: ...
def write_table(
table: Table,
where: str | NativeFile,
row_group_size: int | None = None,
version: Literal["1.0", "2.4", "2.6"] = "2.6",
use_dictionary: bool | list = True,
compression: Literal["none", "snappy", "gzip", "brotli", "lz4", "zstd"]
| dict[str, Literal["none", "snappy", "gzip", "brotli", "lz4", "zstd"]] = "snappy",
write_statistics: bool | list = True,
use_deprecated_int96_timestamps: bool | None = None,
coerce_timestamps: str | None = None,
allow_truncated_timestamps: bool = False,
data_page_size: int | None = None,
flavor: Literal["spark"] | None = None,
filesystem: FileSystem | None = None,
compression_level: int | dict | None = None,
use_byte_stream_split: bool | list = False,
column_encoding: str | dict | None = None,
data_page_version: Literal["1.0", "2.0"] = "1.0",
use_compliant_nested_type: bool = True,
encryption_properties: Any | None = None,
write_batch_size: int | None = None,
dictionary_pagesize_limit: int | None = None,
store_schema: bool = True,
write_page_index: bool = False,
**kwargs: Any,
) -> None: ...

27
stubs/pyarrow/types.pyi Normal file
View file

@ -0,0 +1,27 @@
from __future__ import annotations
from . import DataType, Date32Type, Date64Type, TimestampType
def is_null(t: DataType) -> bool: ...
def is_struct(t: DataType) -> bool: ...
def is_boolean(t: DataType) -> bool: ...
def is_integer(t: DataType) -> bool: ...
def is_floating(t: DataType) -> bool: ...
def is_decimal(t: DataType) -> bool: ...
def is_temporal(t: DataType) -> bool: ...
def is_date(t: DataType) -> bool: ...
def is_date32(t: DataType) -> bool:
if isinstance(t, Date32Type):
return True
return False
def is_date64(t: DataType) -> bool:
if isinstance(t, Date64Type):
return True
return False
def is_time(t: DataType) -> bool: ...
def is_timestamp(t: DataType) -> bool:
if isinstance(t, TimestampType):
return True
return False

34
tests/conftest.py Normal file
View file

@ -0,0 +1,34 @@
from __future__ import annotations
from typing import Sequence, Type
import pytest
from textual_fastdatatable.backend import ArrowBackend, DataTableBackend, PolarsBackend
@pytest.fixture
def pydict() -> dict[str, Sequence[str | int]]:
return {
"first column": [1, 2, 3, 4, 5],
"two": ["a", "b", "c", "d", "asdfasdf"],
"three": ["foo", "bar", "baz", "qux", "foofoo"],
}
@pytest.fixture
def records(pydict: dict[str, Sequence[str | int]]) -> list[tuple[str | int, ...]]:
header = tuple(pydict.keys())
cols = list(pydict.values())
num_rows = len(cols[0])
data = [tuple([col[i] for col in cols]) for i in range(num_rows)]
return [header, *data]
@pytest.fixture(params=[ArrowBackend, PolarsBackend])
def backend(
request: Type[pytest.FixtureRequest], pydict: dict[str, Sequence[str | int]]
) -> DataTableBackend:
backend_cls = request.param
assert issubclass(backend_cls, (ArrowBackend, PolarsBackend))
backend: ArrowBackend | PolarsBackend = backend_cls.from_pydict(pydict)
return backend

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 Will McGugan
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,185 @@
from textual.app import App
from textual.containers import Container, Horizontal, ScrollableContainer, Vertical
from textual.screen import Screen
from textual.widgets import Header, Label
from textual_fastdatatable import ArrowBackend, DataTable
class LabeledBox(Container):
DEFAULT_CSS = """
LabeledBox {
layers: base_ top_;
width: 100%;
height: 100%;
}
LabeledBox > Container {
layer: base_;
border: round $primary;
width: 100%;
height: 100%;
layout: vertical;
}
LabeledBox > Label {
layer: top_;
offset-x: 2;
}
"""
def __init__(self, title, *args, **kwargs):
self.__label = Label(title)
super().__init__(self.__label, Container(*args, **kwargs))
@property
def label(self):
return self.__label
class StatusTable(DataTable):
def __init__(self) -> None:
backend = ArrowBackend.from_pydict(
{
"Foo": ["ABCDEFGH"] * 50,
"Bar": ["0123456789"] * 50,
"Baz": ["IJKLMNOPQRSTUVWXYZ"] * 50,
}
)
super().__init__(backend=backend)
self.cursor_type = "row"
self.show_cursor = False
class Status(LabeledBox):
DEFAULT_CSS = """
Status {
width: auto;
}
Status Container {
width: auto;
}
Status StatusTable {
width: auto;
height: 100%;
margin-top: 1;
scrollbar-gutter: stable;
overflow-x: hidden;
}
"""
def __init__(self, name: str):
self.__name = name
self.__table = StatusTable()
super().__init__(f" {self.__name} ", self.__table)
@property
def name(self) -> str:
return self.__name
@property
def table(self) -> StatusTable:
return self.__table
class Rendering(LabeledBox):
DEFAULT_CSS = """
#issue-info {
height: auto;
border-bottom: dashed #632CA6;
}
#statuses-box {
height: 1fr;
width: auto;
}
"""
def __init__(self):
self.__info = Label("test")
super().__init__(
"",
ScrollableContainer(
Horizontal(self.__info, id="issue-info"),
Horizontal(*[Status(str(i)) for i in range(4)], id="statuses-box"),
id="issues-box",
),
)
@property
def info(self) -> Label:
return self.__info
class Sidebar(LabeledBox):
DEFAULT_CSS = """
#sidebar-status {
height: auto;
border-bottom: dashed #632CA6;
}
#sidebar-options {
height: 1fr;
}
"""
def __init__(self):
self.__status = Label("ok")
self.__options = Vertical()
super().__init__(
"",
Container(self.__status, id="sidebar-status"),
Container(self.__options, id="sidebar-options"),
)
@property
def status(self) -> Label:
return self.__status
@property
def options(self) -> Vertical:
return self.__options
class MyScreen(Screen):
DEFAULT_CSS = """
#main-content {
layout: grid;
grid-size: 2;
grid-columns: 1fr 5fr;
grid-rows: 1fr;
}
#main-content-sidebar {
height: 100%;
}
#main-content-rendering {
height: 100%;
}
"""
def compose(self):
yield Header()
yield Container(
Container(Sidebar(), id="main-content-sidebar"),
Container(Rendering(), id="main-content-rendering"),
id="main-content",
)
class MyApp(App):
async def on_mount(self):
self.install_screen(MyScreen(), "myscreen")
await self.push_screen("myscreen")
if __name__ == "__main__":
app = MyApp()
app.run()

View file

@ -0,0 +1,26 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import ArrowBackend, DataTable
ROWS = [
("lane", "swimmer", "country", "time"),
(4, "Joseph Schooling", "Singapore", 50.39),
(2, "Michael Phelps", "United States", 51.14),
(5, "Chad le Clos", "South Africa", 51.14),
(6, "László Cseh", "Hungary", 51.14),
(3, "Li Zhuhao", "China", 51.26),
(8, "Mehdy Metella", "France", 51.58),
(7, "Tom Shields", "United States", 51.73),
(1, "Aleksandr Sadovnikov", "Russia", 51.84),
(10, "Darren Burns", "Scotland", 51.84),
]
class TableApp(App):
def compose(self) -> ComposeResult:
backend = ArrowBackend.from_records(ROWS, has_header=True)
yield DataTable(backend=backend)
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,36 @@
from textual.app import App, ComposeResult
from textual.binding import Binding
from textual_fastdatatable import ArrowBackend, DataTable
MOVIES = [
"Severance",
"Foundation",
"Dark",
"The Boys",
"The Last of Us",
"Lost in Space",
"Altered Carbon",
]
class AddColumn(App):
BINDINGS = [
Binding(key="c", action="add_column", description="Add Column"),
]
def compose(self) -> ComposeResult:
backend = ArrowBackend.from_pydict({"Movies": MOVIES})
table = DataTable(backend=backend)
column_idx = table.add_column("No Default")
table.add_column("With Default", default="ABC")
table.add_column("Long Default", default="01234567890123456789")
# Ensure we can update a cell
table.update_cell(2, column_idx, "Hello!")
yield table
app = AddColumn()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,24 @@
from rich.panel import Panel
from rich.text import Text
from textual.app import App
from textual_fastdatatable import DataTable
class AutoHeightRowsApp(App[None]):
def compose(self):
table = DataTable()
self.column = table.add_column("N")
table.add_column("Column", width=10)
table.add_row(3, "hey there", height=None)
table.add_row(1, Text("hey there"), height=None)
table.add_row(5, Text("long string", overflow="fold"), height=None)
table.add_row(2, Panel.fit("Hello\nworld"), height=None)
table.add_row(4, "1\n2\n3\n4\n5\n6\n7", height=None)
yield table
def key_s(self):
self.query_one(DataTable).sort(self.column)
if __name__ == "__main__":
AutoHeightRowsApp().run()

View file

@ -0,0 +1,35 @@
import csv
import io
from textual.app import App, ComposeResult
from textual_fastdatatable import ArrowBackend, DataTable
CSV = """lane,swimmer,country,time
4,Joseph Schooling,Singapore,50.39
2,Michael Phelps,United States,51.14
5,Chad le Clos,South Africa,51.14
6,László Cseh,Hungary,51.14
3,Li Zhuhao,China,51.26
8,Mehdy Metella,France,51.58
7,Tom Shields,United States,51.73
1,Aleksandr Sadovnikov,Russia,51.84"""
class TableApp(App):
def compose(self) -> ComposeResult:
rows = csv.reader(io.StringIO(CSV))
labels = next(rows)
data = [row for row in rows]
backend = ArrowBackend.from_pydict(
{label: [row[i] for row in data] for i, label in enumerate(labels)}
)
table = DataTable(
backend=backend, cursor_type="column", fixed_columns=1, fixed_rows=1
)
table.focus()
yield table
if __name__ == "__main__":
app = TableApp()
app.run()

View file

@ -0,0 +1,35 @@
import csv
import io
from textual.app import App, ComposeResult
from textual_fastdatatable import ArrowBackend, DataTable
CSV = """lane,swimmer,country,time
4,Joseph Schooling,Singapore,50.39
2,Michael Phelps,United States,51.14
5,Chad le Clos,South Africa,51.14
6,László Cseh,Hungary,51.14
3,Li Zhuhao,China,51.26
8,Mehdy Metella,France,51.58
7,Tom Shields,United States,51.73
1,Aleksandr Sadovnikov,Russia,51.84"""
class TableApp(App):
def compose(self) -> ComposeResult:
rows = csv.reader(io.StringIO(CSV))
labels = next(rows)
data = [row for row in rows]
backend = ArrowBackend.from_pydict(
{label: [row[i] for row in data] for i, label in enumerate(labels)}
)
table = DataTable(
backend=backend, cursor_type="range", max_column_content_width=8
)
table.focus()
yield table
if __name__ == "__main__":
app = TableApp()
app.run()

View file

@ -0,0 +1,26 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import ArrowBackend, DataTable
ROWS = [
("lane", "swimmer", "country", "time"),
(4, "[Joseph Schooling]", "Singapore", 50.39),
(2, "[red]Michael Phelps[/]", "United States", 51.14),
(5, "[bold]Chad le Clos[/]", "South Africa", 51.14),
(6, "László Cseh", "Hungary", 51.14),
(3, "Li Zhuhao", "China", 51.26),
(8, "Mehdy Metella", "France", 51.58),
(7, "Tom Shields", "United States", 51.73),
(1, "Aleksandr Sadovnikov", "Russia", 51.84),
(10, "Darren Burns", "Scotland", 51.84),
]
class TableApp(App):
def compose(self) -> ComposeResult:
backend = ArrowBackend.from_records(ROWS, has_header=True)
yield DataTable(backend=backend, render_markup=False)
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,22 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import ArrowBackend, DataTable
ROWS = [
("lane", "swimmer", "country", "time"),
(3, "Li Zhuhao", "China", 51.26),
("eight", None, "France", 51.58),
("seven", "Tom Shields", "United States", None),
(1, "Aleksandr Sadovnikov", "Russia", 51.84),
(None, "Darren Burns", "Scotland", 51.84),
]
class TableApp(App):
def compose(self) -> ComposeResult:
backend = ArrowBackend.from_records(ROWS, has_header=True)
yield DataTable(backend=backend, null_rep="[dim]∅ null[/]")
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,33 @@
import csv
import io
from textual.app import App, ComposeResult
from textual_fastdatatable import ArrowBackend, DataTable
CSV = """lane,swimmer,country,time
4,Joseph Schooling,Singapore,50.39
2,Michael Phelps,United States,51.14
5,Chad le Clos,South Africa,51.14
6,László Cseh,Hungary,51.14
3,Li Zhuhao,China,51.26
8,Mehdy Metella,France,51.58
7,Tom Shields,United States,51.73
1,Aleksandr Sadovnikov,Russia,51.84"""
class TableApp(App):
def compose(self) -> ComposeResult:
rows = csv.reader(io.StringIO(CSV))
labels = next(rows)
data = [row for row in rows]
backend = ArrowBackend.from_pydict(
{label: [row[i] for row in data] for i, label in enumerate(labels)}
)
table = DataTable(backend=backend, cursor_type="range")
table.focus()
yield table
if __name__ == "__main__":
app = TableApp()
app.run()

View file

@ -0,0 +1,45 @@
from textual.app import App, ComposeResult
from textual.binding import Binding
from textual_fastdatatable import ArrowBackend, DataTable
ROWS = [
("lane", "swimmer", "country", "time"),
(5, "Chad le Clos", "South Africa", 51.14),
(4, "Joseph Schooling", "Singapore", 50.39),
(2, "Michael Phelps", "United States", 51.14),
(6, "László Cseh", "Hungary", 51.14),
(3, "Li Zhuhao", "China", 51.26),
(8, "Mehdy Metella", "France", 51.58),
(7, "Tom Shields", "United States", 51.73),
(10, "Darren Burns", "Scotland", 51.84),
(1, "Aleksandr Sadovnikov", "Russia", 51.84),
]
class TableApp(App):
"""Snapshot app for testing removal of rows.
Removes several rows, so we can check that the display of the
DataTable updates as expected."""
BINDINGS = [
Binding("r", "remove_row", "Remove Row"),
]
def compose(self) -> ComposeResult:
backend = ArrowBackend.from_records(ROWS, has_header=True)
yield DataTable(backend=backend)
def on_mount(self) -> None:
table = self.query_one(DataTable)
table.focus()
def action_remove_row(self):
table = self.query_one(DataTable)
table.remove_row(2)
table.remove_row(4)
table.remove_row(6)
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,36 @@
import csv
import io
from textual.app import App, ComposeResult
from textual_fastdatatable import ArrowBackend, DataTable
CSV = """lane,swimmer,country,time
4,Joseph Schooling,Singapore,50.39
2,Michael Phelps,United States,51.14
5,Chad le Clos,South Africa,51.14
6,László Cseh,Hungary,51.14
3,Li Zhuhao,China,51.26
8,Mehdy Metella,France,51.58
7,Tom Shields,United States,51.73
1,Aleksandr Sadovnikov,Russia,51.84"""
class TableApp(App):
def compose(self) -> ComposeResult:
rows = csv.reader(io.StringIO(CSV))
labels = next(rows)
data = [row for row in rows]
backend = ArrowBackend.from_pydict(
{label: [row[i] for row in data] for i, label in enumerate(labels)}
)
table = DataTable(backend=backend)
table.focus()
table.cursor_type = "row"
table.fixed_columns = 1
table.fixed_rows = 1
yield table
if __name__ == "__main__":
app = TableApp()
app.run()

View file

@ -0,0 +1,37 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import DataTable
ROWS = [
("lane", "swimmer", "country", "time"),
(5, "Chad le Clos", "South Africa", 51.14),
(4, "Joseph Schooling", "Singapore", 50.39),
(2, "Michael Phelps", "United States", 51.14),
(6, "László Cseh", "Hungary", 51.14),
(3, "Li Zhuhao", "China", 51.26),
(8, "Mehdy Metella", "France", 51.58),
(7, "Tom Shields", "United States", 51.73),
(10, "Darren Burns", "Scotland", 51.84),
(1, "Aleksandr Sadovnikov", "Russia", 51.84),
]
class TableApp(App):
def compose(self) -> ComposeResult:
yield DataTable()
def on_mount(self) -> None:
table = self.query_one(DataTable)
table.fixed_rows = 1
table.fixed_columns = 1
table.focus()
rows = iter(ROWS)
column_labels = next(rows)
for column in column_labels:
table.add_column(column, key=column)
for index, row in enumerate(rows):
table.add_row(*row, label=str(index))
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,40 @@
from textual.app import App, ComposeResult
from textual.binding import Binding
from textual_fastdatatable import ArrowBackend, DataTable
# Shuffled around a bit to exercise sorting.
ROWS = [
("lane", "swimmer", "country", "time"),
(5, "Chad le Clos", "South Africa", 51.14),
(4, "Joseph Schooling", "Singapore", 50.39),
(2, "Michael Phelps", "United States", 51.14),
(6, "László Cseh", "Hungary", 51.14),
(3, "Li Zhuhao", "China", 51.26),
(8, "Mehdy Metella", "France", 51.58),
(7, "Tom Shields", "United States", 51.73),
(10, "Darren Burns", "Scotland", 51.84),
(1, "Aleksandr Sadovnikov", "Russia", 51.84),
]
class TableApp(App):
BINDINGS = [
Binding("s", "sort", "Sort"),
]
def compose(self) -> ComposeResult:
backend = ArrowBackend.from_records(ROWS, has_header=True)
yield DataTable(backend=backend)
def on_mount(self) -> None:
table = self.query_one(DataTable)
table.focus()
def action_sort(self):
table = self.query_one(DataTable)
table.sort([("time", "ascending"), ("lane", "ascending")])
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,65 @@
from __future__ import annotations
from textual.app import App, ComposeResult
from textual.widgets import Label
from textual_fastdatatable import ArrowBackend, DataTable
from typing_extensions import Literal
data = [
"Severance",
"Foundation",
"Dark",
]
def make_datatable(
foreground_priority: Literal["css", "renderable"],
background_priority: Literal["css", "renderable"],
) -> DataTable:
backend = ArrowBackend.from_pydict(
{"Movies": [f"[red on blue]{row}" for row in data]}
)
table = DataTable(
backend=backend,
cursor_foreground_priority=foreground_priority,
cursor_background_priority=background_priority,
)
table.zebra_stripes = True
return table
class DataTableCursorStyles(App):
"""Regression test snapshot app which ensures that styles
are layered on top of each other correctly in the DataTable.
In this example, the colour of the text in the cells under
the cursor should not be red, because the CSS should be applied
on top."""
CSS = """
DataTable {margin-bottom: 1;}
DataTable > .datatable--cursor {
color: $secondary;
background: $success;
text-style: bold italic;
}
"""
def compose(self) -> ComposeResult:
priorities: list[
tuple[Literal["css", "renderable"], Literal["css", "renderable"]]
] = [
("css", "css"),
("css", "renderable"),
("renderable", "renderable"),
("renderable", "css"),
]
for foreground, background in priorities:
yield Label(f"Foreground is {foreground!r}, background is {background!r}:")
table = make_datatable(foreground, background)
yield table
app = DataTableCursorStyles()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,58 @@
from pathlib import Path
from textual.app import App, ComposeResult
from textual_fastdatatable import ArrowBackend, DataTable
CSS_PATH = (Path(__file__) / "../datatable_hot_reloading.tcss").resolve()
# Write some CSS to the file before the app loads.
# Then, the test will clear all the CSS to see if the
# hot reloading applies the changes correctly.
CSS_PATH.write_text(
"""\
DataTable > .datatable--cursor {
background: purple;
}
DataTable > .datatable--fixed {
background: red;
}
DataTable > .datatable--fixed-cursor {
background: blue;
}
DataTable > .datatable--header {
background: yellow;
}
DataTable > .datatable--odd-row {
background: pink;
}
DataTable > .datatable--even-row {
background: brown;
}
"""
)
class DataTableHotReloadingApp(App[None]):
CSS_PATH = CSS_PATH
def compose(self) -> ComposeResult:
data = {
# orig test set A width=10, we fake it with spaces
"A ": ["one", "three", "five"],
"B": ["two", "four", "six"],
}
backend = ArrowBackend.from_pydict(data)
yield DataTable(backend, zebra_stripes=True, cursor_type="row", fixed_columns=1)
def on_mount(self) -> None:
self.query_one(DataTable)
if __name__ == "__main__":
app = DataTableHotReloadingApp()
app.run()

View file

@ -0,0 +1 @@
/* This file is purposefully empty. */

View file

@ -0,0 +1,12 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import DataTable
class TableApp(App):
def compose(self) -> ComposeResult:
yield DataTable()
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,17 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import DataTable
class TableApp(App):
def compose(self) -> ComposeResult:
yield DataTable()
def on_mount(self) -> None:
table = self.query_one(DataTable)
table.add_column("Foo")
table.add_rows([("1",), ("2",)])
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,16 @@
from pathlib import Path
from textual.app import App, ComposeResult
from textual_fastdatatable import DataTable
class TableApp(App):
def compose(self) -> ComposeResult:
yield DataTable(
data=Path(__file__).parent.parent.parent / "data" / "lap_times_100.parquet"
)
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,20 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import DataTable
DATA = {
"Foo": list(range(50)),
"Bar": ["0123456789"] * 50,
"Baz": ["IJKLMNOPQRSTUVWXYZ"] * 50,
}
class TableApp(App):
def compose(self) -> ComposeResult:
yield DataTable(
data=DATA, column_labels=["[red]Not Foo[/red]", "Zig", "[reverse]Zag[/]"]
)
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,25 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import DataTable
ROWS = [
("lane", "swimmer", "country", "time"),
(4, "Joseph Schooling", "Singapore", 50.39),
(2, "Michael Phelps", "United States", 51.14),
(5, "Chad le Clos", "South Africa", 51.14),
(6, "László Cseh", "Hungary", 51.14),
(3, "Li Zhuhao", "China", 51.26),
(8, "Mehdy Metella", "France", 51.58),
(7, "Tom Shields", "United States", 51.73),
(1, "Aleksandr Sadovnikov", "Russia", 51.84),
(10, "Darren Burns", "Scotland", 51.84),
]
class TableApp(App):
def compose(self) -> ComposeResult:
yield DataTable(data=ROWS)
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,12 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import DataTable
class TableApp(App):
def compose(self) -> ComposeResult:
yield DataTable(column_labels=["foo [red]foo[/red]", "bar"])
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,12 @@
from textual.app import App, ComposeResult
from textual_fastdatatable import DataTable
class TableApp(App):
def compose(self) -> ComposeResult:
yield DataTable(column_labels=["foo [red]foo[/red]", "bar"])
app = TableApp()
if __name__ == "__main__":
app.run()

View file

@ -0,0 +1,119 @@
from pathlib import Path
from typing import Callable
import pytest
# These paths should be relative to THIS directory.
SNAPSHOT_APPS_DIR = Path("./snapshot_apps")
def test_auto_table(snap_compare: Callable) -> None:
assert snap_compare(SNAPSHOT_APPS_DIR / "auto-table.py", terminal_size=(120, 40))
def test_datatable_render(snap_compare: Callable) -> None:
press = ["down", "down", "right", "up", "left"]
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table.py", press=press)
def test_datatable_row_cursor_render(snap_compare: Callable) -> None:
press = ["up", "left", "right", "down", "down"]
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_row_cursor.py", press=press)
def test_datatable_no_render_markup(snap_compare: Callable) -> None:
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_no_render_markup.py")
def test_datatable_null_mixed_cols(snap_compare: Callable) -> None:
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_null_mixed_cols.py")
def test_datatable_range_cursor_render(snap_compare: Callable) -> None:
press = ["right", "down", "shift+right", "shift+down", "shift+down"]
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_range_cursor.py", press=press)
def test_datatable_column_cursor_render(snap_compare: Callable) -> None:
press = ["left", "up", "down", "right", "right"]
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_column_cursor.py", press=press)
def test_datatable_max_width_render(snap_compare: Callable) -> None:
press = ["right", "down", "shift+right", "shift+down", "shift+down"]
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_max_width.py", press=press)
def test_datatable_sort_multikey(snap_compare: Callable) -> None:
press = ["down", "right", "s"] # Also checks that sort doesn't move cursor.
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_sort.py", press=press)
def test_datatable_remove_row(snap_compare: Callable) -> None:
press = ["r"]
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_remove_row.py", press=press)
@pytest.mark.skip(reason="Don't support row labels.")
def test_datatable_labels_and_fixed_data(snap_compare: Callable) -> None:
# Ensure that we render correctly when there are fixed rows/cols and labels.
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_row_labels.py")
# skip, don't xfail; see: https://github.com/Textualize/pytest-textual-snapshot/issues/6
@pytest.mark.skip(
reason=(
"The data in this test includes markup; the backend doesn't"
"know these have zero width, so we draw the column wider than we used to"
)
)
def test_datatable_style_ordering(snap_compare: Callable) -> None:
# Regression test for https -> None://github.com/Textualize/textual/issues/2061
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_style_order.py")
def test_datatable_add_column(snap_compare: Callable) -> None:
# Checking adding columns after adding rows
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_add_column.py")
@pytest.mark.skip(reason="No multi-height rows. No Rich objects.")
def test_datatable_add_row_auto_height(snap_compare: Callable) -> None:
# Check that rows added with auto height computation look right.
assert snap_compare(SNAPSHOT_APPS_DIR / "data_table_add_row_auto_height.py")
@pytest.mark.skip(reason="No multi-height rows. No Rich objects.")
def test_datatable_add_row_auto_height_sorted(snap_compare: Callable) -> None:
# Check that rows added with auto height computation look right.
assert snap_compare(
SNAPSHOT_APPS_DIR / "data_table_add_row_auto_height.py", press=["s"]
)
def test_datatable_empty(snap_compare: Callable) -> None:
assert snap_compare(SNAPSHOT_APPS_DIR / "empty.py")
def test_datatable_empty_add_col(snap_compare: Callable) -> None:
assert snap_compare(SNAPSHOT_APPS_DIR / "empty_add_col.py")
def test_datatable_no_rows(snap_compare: Callable) -> None:
assert snap_compare(SNAPSHOT_APPS_DIR / "no_rows.py")
def test_datatable_no_rows_empty_sequence(snap_compare: Callable) -> None:
assert snap_compare(SNAPSHOT_APPS_DIR / "no_rows_empty_sequence.py")
def test_datatable_from_parquet(snap_compare: Callable) -> None:
assert snap_compare(SNAPSHOT_APPS_DIR / "from_parquet.py")
def test_datatable_from_records(snap_compare: Callable) -> None:
assert snap_compare(SNAPSHOT_APPS_DIR / "from_records.py")
def test_datatable_from_pydict(snap_compare: Callable) -> None:
assert snap_compare(SNAPSHOT_APPS_DIR / "from_pydict_with_col_labels.py")

View file

@ -0,0 +1,94 @@
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Sequence
import pyarrow as pa
from textual_fastdatatable import ArrowBackend
def test_from_records(records: list[tuple[str | int, ...]]) -> None:
backend = ArrowBackend.from_records(records, has_header=True)
assert backend.column_count == 3
assert backend.row_count == 5
assert tuple(backend.columns) == records[0]
def test_from_records_no_header(records: list[tuple[str | int, ...]]) -> None:
backend = ArrowBackend.from_records(records[1:], has_header=False)
assert backend.column_count == 3
assert backend.row_count == 5
assert tuple(backend.columns) == ("f0", "f1", "f2")
def test_from_pydict(pydict: dict[str, Sequence[str | int]]) -> None:
backend = ArrowBackend.from_pydict(pydict)
assert backend.column_count == 3
assert backend.row_count == 5
assert backend.source_row_count == 5
assert tuple(backend.columns) == tuple(pydict.keys())
def test_from_pydict_with_limit(pydict: dict[str, Sequence[str | int]]) -> None:
backend = ArrowBackend.from_pydict(pydict, max_rows=2)
assert backend.column_count == 3
assert backend.row_count == 2
assert backend.source_row_count == 5
assert tuple(backend.columns) == tuple(pydict.keys())
def test_from_parquet(pydict: dict[str, Sequence[str | int]], tmp_path: Path) -> None:
tbl = pa.Table.from_pydict(pydict)
p = tmp_path / "test.parquet"
pa.parquet.write_table(tbl, str(p))
backend = ArrowBackend.from_parquet(p)
assert backend.data.equals(tbl)
def test_empty_query() -> None:
data: dict[str, list] = {"a": []}
backend = ArrowBackend.from_pydict(data)
assert backend.column_content_widths == [0]
def test_dupe_column_labels() -> None:
arr = pa.array([0, 1, 2, 3])
tab = pa.table([arr] * 3, names=["a", "a", "a"])
backend = ArrowBackend(data=tab)
assert backend.column_count == 3
assert backend.row_count == 4
assert backend.get_row_at(2) == [2, 2, 2]
def test_timestamp_with_tz() -> None:
"""
Ensure datetimes with offsets but no names do not crash the data table
when casting to string.
"""
dt = datetime(2024, 1, 1, hour=15, tzinfo=timezone(offset=timedelta(hours=-5)))
arr = pa.array([dt, dt, dt])
tab = pa.table([arr], names=["created_at"])
backend = ArrowBackend(data=tab)
assert backend.column_content_widths == [29]
def test_mixed_types() -> None:
data = [(1000,), ("hi",)]
backend = ArrowBackend.from_records(records=data)
assert backend
assert backend.row_count == 2
assert backend.get_row_at(0) == ["1000"]
assert backend.get_row_at(1) == ["hi"]
def test_negative_timestamps() -> None:
dt = datetime(1, 1, 1, tzinfo=timezone.utc)
arr = pa.array([dt, dt, dt], type=pa.timestamp("s", tz="America/New_York"))
tab = pa.table([arr], names=["created_at"])
backend = ArrowBackend(data=tab)
assert backend.column_content_widths == [26]
assert backend.get_column_at(0) == [datetime.min, datetime.min, datetime.min]
assert backend.get_row_at(0) == [datetime.min]
assert backend.get_cell_at(0, 0) is datetime.min

View file

@ -0,0 +1,109 @@
from __future__ import annotations
import pytest
from textual_fastdatatable.backend import DataTableBackend
def test_column_content_widths(backend: DataTableBackend) -> None:
assert backend.column_content_widths == [1, 8, 6]
def test_get_row_at(backend: DataTableBackend) -> None:
assert backend.get_row_at(0) == [1, "a", "foo"]
assert backend.get_row_at(4) == [5, "asdfasdf", "foofoo"]
with pytest.raises(IndexError):
backend.get_row_at(10)
with pytest.raises(IndexError):
backend.get_row_at(-1)
def test_get_column_at(backend: DataTableBackend) -> None:
assert backend.get_column_at(0) == [1, 2, 3, 4, 5]
assert backend.get_column_at(2) == ["foo", "bar", "baz", "qux", "foofoo"]
with pytest.raises(IndexError):
backend.get_column_at(10)
def test_get_cell_at(backend: DataTableBackend) -> None:
assert backend.get_cell_at(0, 0) == 1
assert backend.get_cell_at(4, 1) == "asdfasdf"
with pytest.raises(IndexError):
backend.get_cell_at(10, 0)
with pytest.raises(IndexError):
backend.get_cell_at(0, 10)
def test_append_column(backend: DataTableBackend) -> None:
original_table = backend.data
backend.append_column("new")
assert backend.column_count == 4
assert backend.row_count == 5
assert backend.get_column_at(3) == [None] * backend.row_count
backend.append_column("def", default="zzz")
assert backend.column_count == 5
assert backend.row_count == 5
assert backend.get_column_at(4) == ["zzz"] * backend.row_count
assert backend.data.select(["first column", "two", "three"]).equals(original_table)
def test_append_rows(backend: DataTableBackend) -> None:
original_table = backend.data
backend.append_rows([(6, "w", "x"), (7, "y", "z")])
assert backend.column_count == 3
assert backend.row_count == 7
assert backend.column_content_widths == [1, 8, 6]
backend.append_rows([(999, "w" * 12, "x" * 15)])
assert backend.column_count == 3
assert backend.row_count == 8
assert backend.column_content_widths == [3, 12, 15]
assert backend.data.slice(0, 5).equals(original_table)
def test_drop_row(backend: DataTableBackend) -> None:
backend.drop_row(0)
assert backend.row_count == 4
assert backend.column_count == 3
assert backend.column_content_widths == [1, 8, 6]
backend.drop_row(3)
assert backend.row_count == 3
assert backend.column_count == 3
assert backend.column_content_widths == [1, 1, 3]
with pytest.raises(IndexError):
backend.drop_row(3)
def test_update_cell(backend: DataTableBackend) -> None:
backend.update_cell(0, 0, 0)
assert backend.get_column_at(0) == [0, 2, 3, 4, 5]
assert backend.row_count == 5
assert backend.column_count == 3
assert backend.column_content_widths == [1, 8, 6]
backend.update_cell(3, 1, "z" * 50)
assert backend.get_row_at(3) == [4, "z" * 50, "qux"]
assert backend.row_count == 5
assert backend.column_count == 3
assert backend.column_content_widths == [1, 50, 6]
def test_sort(backend: DataTableBackend) -> None:
original_table = backend.data
original_col_one = list(backend.get_column_at(0)).copy()
original_col_two = list(backend.get_column_at(1)).copy()
backend.sort(by="two")
assert backend.get_column_at(0) != original_col_one
assert backend.get_column_at(1) == sorted(original_col_two)
backend.sort(by=[("two", "descending")])
assert backend.get_column_at(0) != original_col_one
assert backend.get_column_at(1) == sorted(original_col_two, reverse=True)
backend.sort(by=[("first column", "ascending")])
assert backend.data.equals(original_table)

View file

@ -0,0 +1,54 @@
from datetime import date, datetime
import pyarrow as pa
from textual_fastdatatable.backend import create_backend
MAX_32BIT_INT = 2**31 - 1
MAX_64BIT_INT = 2**63 - 1
def test_empty_sequence() -> None:
backend = create_backend(data=[])
assert backend
assert backend.row_count == 0
assert backend.column_count == 0
assert backend.columns == []
assert backend.column_content_widths == []
def test_infinity_timestamps() -> None:
from_py = create_backend(
data={"dt": [date.max, date.min], "ts": [datetime.max, datetime.min]}
)
assert from_py
assert from_py.row_count == 2
from_arrow = create_backend(
data=pa.table(
{
"dt32": [
pa.scalar(MAX_32BIT_INT, type=pa.date32()),
pa.scalar(-MAX_32BIT_INT, type=pa.date32()),
],
"dt64": [
pa.scalar(MAX_64BIT_INT, type=pa.date64()),
pa.scalar(-MAX_64BIT_INT, type=pa.date64()),
],
"ts": [
pa.scalar(MAX_64BIT_INT, type=pa.timestamp("s")),
pa.scalar(-MAX_64BIT_INT, type=pa.timestamp("s")),
],
"tns": [
pa.scalar(MAX_64BIT_INT, type=pa.timestamp("ns")),
pa.scalar(-MAX_64BIT_INT, type=pa.timestamp("ns")),
],
}
)
)
assert from_arrow
assert from_arrow.row_count == 2
assert from_arrow.get_row_at(0) == [date.max, date.max, datetime.max, datetime.max]
assert from_arrow.get_row_at(1) == [date.min, date.min, datetime.min, datetime.min]
assert from_arrow.get_column_at(0) == [date.max, date.min]
assert from_arrow.get_column_at(2) == [datetime.max, datetime.min]
assert from_arrow.get_cell_at(0, 0) == date.max