Adding upstream version 26.2.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
d908bee480
commit
7ee28625fb
85 changed files with 57142 additions and 52288 deletions
9
sqlglotrs/Cargo.lock
generated
9
sqlglotrs/Cargo.lock
generated
|
@ -448,6 +448,12 @@ version = "0.8.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.18"
|
||||
|
@ -497,10 +503,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "sqlglotrs"
|
||||
version = "0.3.4"
|
||||
version = "0.3.5"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"pyo3",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sqlglotrs",
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
[package]
|
||||
name = "sqlglotrs"
|
||||
version = "0.3.4"
|
||||
version = "0.3.5"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
|
||||
[lib]
|
||||
name = "sqlglotrs"
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
bench = false
|
||||
|
||||
[[bench]]
|
||||
name = "long"
|
||||
|
@ -19,6 +20,7 @@ profiling = ["serde", "serde_json"]
|
|||
|
||||
[dependencies]
|
||||
pyo3 = {version ="0.22.6", features = ["auto-initialize"]}
|
||||
rustc-hash = { version = "2.1" }
|
||||
|
||||
# Optional dependencies used for profiling
|
||||
serde = { version = "1", features = ["derive"] , optional = true }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use pyo3::prelude::*;
|
||||
use rustc_hash::FxHashMap as HashMap;
|
||||
use rustc_hash::FxHashSet as HashSet;
|
||||
|
||||
pub type TokenType = u16;
|
||||
|
||||
|
|
|
@ -49,13 +49,11 @@ impl Token {
|
|||
pub fn append_comments(&self, comments: &mut Vec<String>) {
|
||||
Python::with_gil(|py| {
|
||||
let pylist = self.comments.bind(py);
|
||||
for comment in comments.iter() {
|
||||
for comment in comments.drain(..) {
|
||||
if let Err(_) = pylist.append(comment) {
|
||||
panic!("Failed to append comments to the Python list");
|
||||
}
|
||||
}
|
||||
});
|
||||
// Simulate `Vec::append`.
|
||||
let _ = std::mem::replace(comments, Vec::new());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,14 +23,11 @@ pub struct Tokenizer {
|
|||
impl Tokenizer {
|
||||
#[new]
|
||||
pub fn new(settings: TokenizerSettings, token_types: TokenTypeSettings) -> Tokenizer {
|
||||
let mut keyword_trie = Trie::new();
|
||||
let single_token_strs: Vec<String> = settings
|
||||
.single_tokens
|
||||
.keys()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
let trie_filter =
|
||||
|key: &&String| key.contains(" ") || single_token_strs.iter().any(|t| key.contains(t));
|
||||
let mut keyword_trie = Trie::default();
|
||||
|
||||
let trie_filter = |key: &&String| {
|
||||
key.contains(" ") || settings.single_tokens.keys().any(|&t| key.contains(t))
|
||||
};
|
||||
|
||||
keyword_trie.add(settings.keywords.keys().filter(trie_filter));
|
||||
keyword_trie.add(settings.comments.keys().filter(trie_filter));
|
||||
|
@ -114,7 +111,7 @@ impl<'a> TokenizerState<'a> {
|
|||
|
||||
fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
|
||||
self.scan(None)?;
|
||||
Ok(std::mem::replace(&mut self.tokens, Vec::new()))
|
||||
Ok(std::mem::take(&mut self.tokens))
|
||||
}
|
||||
|
||||
fn scan(&mut self, until_peek_char: Option<char>) -> Result<(), TokenizerError> {
|
||||
|
@ -146,7 +143,7 @@ impl<'a> TokenizerState<'a> {
|
|||
}
|
||||
|
||||
if !self.settings.white_space.contains_key(&self.current_char) {
|
||||
if self.current_char.is_digit(10) {
|
||||
if self.current_char.is_ascii_digit() {
|
||||
self.scan_number()?;
|
||||
} else if let Some(identifier_end) =
|
||||
self.settings.identifiers.get(&self.current_char)
|
||||
|
@ -205,7 +202,7 @@ impl<'a> TokenizerState<'a> {
|
|||
}
|
||||
|
||||
fn char_at(&self, index: usize) -> Result<char, TokenizerError> {
|
||||
self.sql.get(index).map(|c| *c).ok_or_else(|| {
|
||||
self.sql.get(index).copied().ok_or_else(|| {
|
||||
self.error(format!(
|
||||
"Index {} is out of bound (size {})",
|
||||
index, self.size
|
||||
|
@ -237,7 +234,7 @@ impl<'a> TokenizerState<'a> {
|
|||
self.column,
|
||||
self.start,
|
||||
self.current - 1,
|
||||
std::mem::replace(&mut self.comments, Vec::new()),
|
||||
std::mem::take(&mut self.comments),
|
||||
));
|
||||
|
||||
// If we have either a semicolon or a begin token before the command's token, we'll parse
|
||||
|
@ -503,7 +500,7 @@ impl<'a> TokenizerState<'a> {
|
|||
let mut scientific = 0;
|
||||
|
||||
loop {
|
||||
if self.peek_char.is_digit(10) {
|
||||
if self.peek_char.is_ascii_digit() {
|
||||
self.advance(1)?;
|
||||
} else if self.peek_char == '.' && !decimal {
|
||||
if self.tokens.last().map(|t| t.token_type) == Some(self.token_types.parameter) {
|
||||
|
@ -537,8 +534,7 @@ impl<'a> TokenizerState<'a> {
|
|||
.numeric_literals
|
||||
.get(&literal.to_uppercase())
|
||||
.unwrap_or(&String::from("")),
|
||||
)
|
||||
.map(|x| *x);
|
||||
).copied();
|
||||
|
||||
let replaced = literal.replace("_", "");
|
||||
|
||||
|
@ -607,8 +603,7 @@ impl<'a> TokenizerState<'a> {
|
|||
} else {
|
||||
self.settings
|
||||
.keywords
|
||||
.get(&self.text().to_uppercase())
|
||||
.map(|x| *x)
|
||||
.get(&self.text().to_uppercase()).copied()
|
||||
.unwrap_or(self.token_types.var)
|
||||
};
|
||||
self.add(token_type, None)
|
||||
|
@ -718,13 +713,13 @@ impl<'a> TokenizerState<'a> {
|
|||
if i == 0 {
|
||||
self.is_alphabetic_or_underscore(c)
|
||||
} else {
|
||||
self.is_alphabetic_or_underscore(c) || c.is_digit(10)
|
||||
self.is_alphabetic_or_underscore(c) || c.is_ascii_digit()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn is_numeric(&mut self, s: &str) -> bool {
|
||||
s.chars().all(|c| c.is_digit(10))
|
||||
s.chars().all(|c| c.is_ascii_digit())
|
||||
}
|
||||
|
||||
fn extract_value(&mut self) -> Result<String, TokenizerError> {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::collections::HashMap;
|
||||
use rustc_hash::FxHashMap as HashMap;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Default)]
|
||||
pub struct TrieNode {
|
||||
is_word: bool,
|
||||
children: HashMap<char, TrieNode>,
|
||||
|
@ -35,21 +35,12 @@ impl TrieNode {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Trie {
|
||||
pub root: TrieNode,
|
||||
}
|
||||
|
||||
impl Trie {
|
||||
pub fn new() -> Self {
|
||||
Trie {
|
||||
root: TrieNode {
|
||||
is_word: false,
|
||||
children: HashMap::new(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add<'a, I>(&mut self, keys: I)
|
||||
where
|
||||
I: Iterator<Item = &'a String>,
|
||||
|
@ -59,7 +50,7 @@ impl Trie {
|
|||
for c in key.chars() {
|
||||
current = current.children.entry(c).or_insert(TrieNode {
|
||||
is_word: false,
|
||||
children: HashMap::new(),
|
||||
children: HashMap::default(),
|
||||
});
|
||||
}
|
||||
current.is_word = true;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue