1
0
Fork 0

Adding upstream version 25.5.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-13 21:41:00 +01:00
parent 147b6e06e8
commit 4e506fbac7
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
136 changed files with 80990 additions and 72541 deletions

View file

@ -361,10 +361,24 @@ impl<'a> TokenizerState<'a> {
// Skip the comment's start delimiter.
self.advance(comment_start_size as isize)?;
let mut comment_count = 1;
let comment_end_size = comment_end.len();
while !self.is_end && self.chars(comment_end_size) != *comment_end {
while !self.is_end {
if self.chars(comment_end_size) == *comment_end {
comment_count -= 1;
if comment_count == 0 {
break;
}
}
self.advance(1)?;
// Nested comments are allowed by some dialects, e.g. databricks, duckdb, postgres
if !self.is_end && self.chars(comment_start_size) == *comment_start {
self.advance(comment_start_size as isize)?;
comment_count += 1
}
}
let text = self.text();
@ -410,7 +424,7 @@ impl<'a> TokenizerState<'a> {
let tag = if self.current_char.to_string() == *end {
String::from("")
} else {
self.extract_string(end, false, false, !self.settings.heredoc_tag_is_identifier)?
self.extract_string(end, false, true, !self.settings.heredoc_tag_is_identifier)?
};
if !tag.is_empty()
@ -435,7 +449,7 @@ impl<'a> TokenizerState<'a> {
};
self.advance(start.len() as isize)?;
let text = self.extract_string(&end, false, token_type != self.token_types.raw_string, true)?;
let text = self.extract_string(&end, false, token_type == self.token_types.raw_string, true)?;
if let Some(b) = base {
if u64::from_str_radix(&text, b).is_err() {
@ -581,7 +595,7 @@ impl<'a> TokenizerState<'a> {
fn scan_identifier(&mut self, identifier_end: &str) -> Result<(), TokenizerError> {
self.advance(1)?;
let text = self.extract_string(identifier_end, true, true, true)?;
let text = self.extract_string(identifier_end, true, false, true)?;
self.add(self.token_types.identifier, Some(text))
}
@ -589,7 +603,7 @@ impl<'a> TokenizerState<'a> {
&mut self,
delimiter: &str,
use_identifier_escapes: bool,
unescape_sequences: bool,
raw_string: bool,
raise_unmatched: bool,
) -> Result<String, TokenizerError> {
let mut text = String::from("");
@ -602,7 +616,7 @@ impl<'a> TokenizerState<'a> {
};
let peek_char_str = self.peek_char.to_string();
if unescape_sequences
if !raw_string
&& !self.dialect_settings.unescaped_sequences.is_empty()
&& !self.peek_char.is_whitespace()
&& self.settings.string_escapes.contains(&self.current_char)
@ -617,7 +631,8 @@ impl<'a> TokenizerState<'a> {
}
}
if escapes.contains(&self.current_char)
if (self.settings.string_escapes_allowed_in_raw_strings || !raw_string)
&& escapes.contains(&self.current_char)
&& (peek_char_str == delimiter || escapes.contains(&self.peek_char))
&& (self.current_char == self.peek_char
|| !self