Merging upstream version 25.5.1.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
298e7a8147
commit
029b9c2c73
136 changed files with 80990 additions and 72541 deletions
|
@ -76,6 +76,7 @@ pub struct TokenizerSettings {
|
|||
pub commands: HashSet<TokenType>,
|
||||
pub command_prefix_tokens: HashSet<TokenType>,
|
||||
pub heredoc_tag_is_identifier: bool,
|
||||
pub string_escapes_allowed_in_raw_strings: bool,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
|
@ -98,6 +99,7 @@ impl TokenizerSettings {
|
|||
commands: HashSet<TokenType>,
|
||||
command_prefix_tokens: HashSet<TokenType>,
|
||||
heredoc_tag_is_identifier: bool,
|
||||
string_escapes_allowed_in_raw_strings: bool,
|
||||
) -> Self {
|
||||
let to_char = |v: &String| {
|
||||
if v.len() == 1 {
|
||||
|
@ -147,6 +149,7 @@ impl TokenizerSettings {
|
|||
commands,
|
||||
command_prefix_tokens,
|
||||
heredoc_tag_is_identifier,
|
||||
string_escapes_allowed_in_raw_strings,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -361,10 +361,24 @@ impl<'a> TokenizerState<'a> {
|
|||
// Skip the comment's start delimiter.
|
||||
self.advance(comment_start_size as isize)?;
|
||||
|
||||
let mut comment_count = 1;
|
||||
let comment_end_size = comment_end.len();
|
||||
|
||||
while !self.is_end && self.chars(comment_end_size) != *comment_end {
|
||||
while !self.is_end {
|
||||
if self.chars(comment_end_size) == *comment_end {
|
||||
comment_count -= 1;
|
||||
if comment_count == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
self.advance(1)?;
|
||||
|
||||
// Nested comments are allowed by some dialects, e.g. databricks, duckdb, postgres
|
||||
if !self.is_end && self.chars(comment_start_size) == *comment_start {
|
||||
self.advance(comment_start_size as isize)?;
|
||||
comment_count += 1
|
||||
}
|
||||
}
|
||||
|
||||
let text = self.text();
|
||||
|
@ -410,7 +424,7 @@ impl<'a> TokenizerState<'a> {
|
|||
let tag = if self.current_char.to_string() == *end {
|
||||
String::from("")
|
||||
} else {
|
||||
self.extract_string(end, false, false, !self.settings.heredoc_tag_is_identifier)?
|
||||
self.extract_string(end, false, true, !self.settings.heredoc_tag_is_identifier)?
|
||||
};
|
||||
|
||||
if !tag.is_empty()
|
||||
|
@ -435,7 +449,7 @@ impl<'a> TokenizerState<'a> {
|
|||
};
|
||||
|
||||
self.advance(start.len() as isize)?;
|
||||
let text = self.extract_string(&end, false, token_type != self.token_types.raw_string, true)?;
|
||||
let text = self.extract_string(&end, false, token_type == self.token_types.raw_string, true)?;
|
||||
|
||||
if let Some(b) = base {
|
||||
if u64::from_str_radix(&text, b).is_err() {
|
||||
|
@ -581,7 +595,7 @@ impl<'a> TokenizerState<'a> {
|
|||
|
||||
fn scan_identifier(&mut self, identifier_end: &str) -> Result<(), TokenizerError> {
|
||||
self.advance(1)?;
|
||||
let text = self.extract_string(identifier_end, true, true, true)?;
|
||||
let text = self.extract_string(identifier_end, true, false, true)?;
|
||||
self.add(self.token_types.identifier, Some(text))
|
||||
}
|
||||
|
||||
|
@ -589,7 +603,7 @@ impl<'a> TokenizerState<'a> {
|
|||
&mut self,
|
||||
delimiter: &str,
|
||||
use_identifier_escapes: bool,
|
||||
unescape_sequences: bool,
|
||||
raw_string: bool,
|
||||
raise_unmatched: bool,
|
||||
) -> Result<String, TokenizerError> {
|
||||
let mut text = String::from("");
|
||||
|
@ -602,7 +616,7 @@ impl<'a> TokenizerState<'a> {
|
|||
};
|
||||
let peek_char_str = self.peek_char.to_string();
|
||||
|
||||
if unescape_sequences
|
||||
if !raw_string
|
||||
&& !self.dialect_settings.unescaped_sequences.is_empty()
|
||||
&& !self.peek_char.is_whitespace()
|
||||
&& self.settings.string_escapes.contains(&self.current_char)
|
||||
|
@ -617,7 +631,8 @@ impl<'a> TokenizerState<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
if escapes.contains(&self.current_char)
|
||||
if (self.settings.string_escapes_allowed_in_raw_strings || !raw_string)
|
||||
&& escapes.contains(&self.current_char)
|
||||
&& (peek_char_str == delimiter || escapes.contains(&self.peek_char))
|
||||
&& (self.current_char == self.peek_char
|
||||
|| !self
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue