1
0
Fork 0

Merging upstream version 26.1.3.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:59:50 +01:00
parent 829d661a08
commit c8d4d2df63
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
119 changed files with 71635 additions and 68059 deletions

View file

@ -1,10 +1,12 @@
use pyo3::prelude::*;
use std::collections::{HashMap, HashSet};
use pyo3::prelude::*;
pub type TokenType = u16;
#[derive(Clone, Debug)]
#[pyclass]
#[cfg_attr(feature = "profiling", derive(serde::Serialize, serde::Deserialize))]
pub struct TokenTypeSettings {
pub bit_string: TokenType,
pub break_: TokenType,
@ -41,7 +43,7 @@ impl TokenTypeSettings {
heredoc_string_alternative: TokenType,
hint: TokenType,
) -> Self {
TokenTypeSettings {
let token_type_settings = TokenTypeSettings {
bit_string,
break_,
dcolon,
@ -56,12 +58,31 @@ impl TokenTypeSettings {
var,
heredoc_string_alternative,
hint,
};
#[cfg(feature = "profiling")]
{
token_type_settings.write_json_to_string();
}
token_type_settings
}
}
#[cfg(feature = "profiling")]
impl TokenTypeSettings {
pub fn write_json_to_string(&self) {
let json = serde_json::to_string(self).unwrap();
let path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("benches/token_type_settings.json");
// Write to file
std::fs::write(path, &json).unwrap();
}
}
#[derive(Clone, Debug)]
#[pyclass]
#[cfg_attr(feature = "profiling", derive(serde::Serialize, serde::Deserialize))]
pub struct TokenizerSettings {
pub white_space: HashMap<char, TokenType>,
pub single_tokens: HashMap<char, TokenType>,
@ -141,7 +162,7 @@ impl TokenizerSettings {
let var_single_tokens_native: HashSet<char> =
var_single_tokens.iter().map(&to_char).collect();
TokenizerSettings {
let tokenizer_settings = TokenizerSettings {
white_space: white_space_native,
single_tokens: single_tokens_native,
keywords,
@ -162,15 +183,35 @@ impl TokenizerSettings {
string_escapes_allowed_in_raw_strings,
nested_comments,
hint_start,
};
#[cfg(feature = "profiling")]
{
tokenizer_settings.write_json_to_string();
}
tokenizer_settings
}
}
#[cfg(feature = "profiling")]
impl TokenizerSettings {
pub fn write_json_to_string(&self) {
let json = serde_json::to_string(self).unwrap();
let path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("benches/tokenizer_settings.json");
// Write to file
std::fs::write(path, &json).unwrap();
}
}
#[derive(Clone, Debug)]
#[pyclass]
#[cfg_attr(feature = "profiling", derive(serde::Serialize, serde::Deserialize))]
pub struct TokenizerDialectSettings {
pub unescaped_sequences: HashMap<String, String>,
pub identifiers_can_start_with_digit: bool,
pub numbers_can_be_underscore_separated: bool,
}
#[pymethods]
@ -179,10 +220,29 @@ impl TokenizerDialectSettings {
pub fn new(
unescaped_sequences: HashMap<String, String>,
identifiers_can_start_with_digit: bool,
numbers_can_be_underscore_separated: bool,
) -> Self {
TokenizerDialectSettings {
let settings = TokenizerDialectSettings {
unescaped_sequences,
identifiers_can_start_with_digit,
numbers_can_be_underscore_separated,
};
#[cfg(feature = "profiling")]
{
settings.write_json_to_string();
}
settings
}
}
#[cfg(feature = "profiling")]
impl TokenizerDialectSettings {
pub fn write_json_to_string(&self) {
let json = serde_json::to_string(self).unwrap();
let path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("benches/tokenizer_dialect_settings.json");
std::fs::write(path, &json).unwrap();
}
}