1
0
Fork 0

Adding upstream version 26.2.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:59:57 +01:00
parent d908bee480
commit 7ee28625fb
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
85 changed files with 57142 additions and 52288 deletions

9
sqlglotrs/Cargo.lock generated
View file

@ -448,6 +448,12 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rustc-hash"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"
[[package]]
name = "ryu"
version = "1.0.18"
@ -497,10 +503,11 @@ dependencies = [
[[package]]
name = "sqlglotrs"
version = "0.3.4"
version = "0.3.5"
dependencies = [
"criterion",
"pyo3",
"rustc-hash",
"serde",
"serde_json",
"sqlglotrs",

View file

@ -1,12 +1,13 @@
[package]
name = "sqlglotrs"
version = "0.3.4"
version = "0.3.5"
edition = "2021"
license = "MIT"
[lib]
name = "sqlglotrs"
crate-type = ["cdylib", "rlib"]
bench = false
[[bench]]
name = "long"
@ -19,6 +20,7 @@ profiling = ["serde", "serde_json"]
[dependencies]
pyo3 = {version ="0.22.6", features = ["auto-initialize"]}
rustc-hash = { version = "2.1" }
# Optional dependencies used for profiling
serde = { version = "1", features = ["derive"] , optional = true }

View file

@ -1,6 +1,6 @@
use std::collections::{HashMap, HashSet};
use pyo3::prelude::*;
use rustc_hash::FxHashMap as HashMap;
use rustc_hash::FxHashSet as HashSet;
pub type TokenType = u16;

View file

@ -49,13 +49,11 @@ impl Token {
pub fn append_comments(&self, comments: &mut Vec<String>) {
Python::with_gil(|py| {
let pylist = self.comments.bind(py);
for comment in comments.iter() {
for comment in comments.drain(..) {
if let Err(_) = pylist.append(comment) {
panic!("Failed to append comments to the Python list");
}
}
});
// Simulate `Vec::append`.
let _ = std::mem::replace(comments, Vec::new());
}
}

View file

@ -23,14 +23,11 @@ pub struct Tokenizer {
impl Tokenizer {
#[new]
pub fn new(settings: TokenizerSettings, token_types: TokenTypeSettings) -> Tokenizer {
let mut keyword_trie = Trie::new();
let single_token_strs: Vec<String> = settings
.single_tokens
.keys()
.map(|s| s.to_string())
.collect();
let trie_filter =
|key: &&String| key.contains(" ") || single_token_strs.iter().any(|t| key.contains(t));
let mut keyword_trie = Trie::default();
let trie_filter = |key: &&String| {
key.contains(" ") || settings.single_tokens.keys().any(|&t| key.contains(t))
};
keyword_trie.add(settings.keywords.keys().filter(trie_filter));
keyword_trie.add(settings.comments.keys().filter(trie_filter));
@ -114,7 +111,7 @@ impl<'a> TokenizerState<'a> {
fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
self.scan(None)?;
Ok(std::mem::replace(&mut self.tokens, Vec::new()))
Ok(std::mem::take(&mut self.tokens))
}
fn scan(&mut self, until_peek_char: Option<char>) -> Result<(), TokenizerError> {
@ -146,7 +143,7 @@ impl<'a> TokenizerState<'a> {
}
if !self.settings.white_space.contains_key(&self.current_char) {
if self.current_char.is_digit(10) {
if self.current_char.is_ascii_digit() {
self.scan_number()?;
} else if let Some(identifier_end) =
self.settings.identifiers.get(&self.current_char)
@ -205,7 +202,7 @@ impl<'a> TokenizerState<'a> {
}
fn char_at(&self, index: usize) -> Result<char, TokenizerError> {
self.sql.get(index).map(|c| *c).ok_or_else(|| {
self.sql.get(index).copied().ok_or_else(|| {
self.error(format!(
"Index {} is out of bound (size {})",
index, self.size
@ -237,7 +234,7 @@ impl<'a> TokenizerState<'a> {
self.column,
self.start,
self.current - 1,
std::mem::replace(&mut self.comments, Vec::new()),
std::mem::take(&mut self.comments),
));
// If we have either a semicolon or a begin token before the command's token, we'll parse
@ -503,7 +500,7 @@ impl<'a> TokenizerState<'a> {
let mut scientific = 0;
loop {
if self.peek_char.is_digit(10) {
if self.peek_char.is_ascii_digit() {
self.advance(1)?;
} else if self.peek_char == '.' && !decimal {
if self.tokens.last().map(|t| t.token_type) == Some(self.token_types.parameter) {
@ -537,8 +534,7 @@ impl<'a> TokenizerState<'a> {
.numeric_literals
.get(&literal.to_uppercase())
.unwrap_or(&String::from("")),
)
.map(|x| *x);
).copied();
let replaced = literal.replace("_", "");
@ -607,8 +603,7 @@ impl<'a> TokenizerState<'a> {
} else {
self.settings
.keywords
.get(&self.text().to_uppercase())
.map(|x| *x)
.get(&self.text().to_uppercase()).copied()
.unwrap_or(self.token_types.var)
};
self.add(token_type, None)
@ -718,13 +713,13 @@ impl<'a> TokenizerState<'a> {
if i == 0 {
self.is_alphabetic_or_underscore(c)
} else {
self.is_alphabetic_or_underscore(c) || c.is_digit(10)
self.is_alphabetic_or_underscore(c) || c.is_ascii_digit()
}
})
}
fn is_numeric(&mut self, s: &str) -> bool {
s.chars().all(|c| c.is_digit(10))
s.chars().all(|c| c.is_ascii_digit())
}
fn extract_value(&mut self) -> Result<String, TokenizerError> {

View file

@ -1,6 +1,6 @@
use std::collections::HashMap;
use rustc_hash::FxHashMap as HashMap;
#[derive(Debug)]
#[derive(Debug, Default)]
pub struct TrieNode {
is_word: bool,
children: HashMap<char, TrieNode>,
@ -35,21 +35,12 @@ impl TrieNode {
}
}
#[derive(Debug)]
#[derive(Debug, Default)]
pub struct Trie {
pub root: TrieNode,
}
impl Trie {
pub fn new() -> Self {
Trie {
root: TrieNode {
is_word: false,
children: HashMap::new(),
},
}
}
pub fn add<'a, I>(&mut self, keys: I)
where
I: Iterator<Item = &'a String>,
@ -59,7 +50,7 @@ impl Trie {
for c in key.chars() {
current = current.children.entry(c).or_insert(TrieNode {
is_word: false,
children: HashMap::new(),
children: HashMap::default(),
});
}
current.is_word = true;