Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions crates/swc_ecma_parser/src/lexer/capturing.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use std::mem;

use swc_atoms::Atom;
use swc_common::BytePos;

use crate::{
error::Error,
input::Tokens,
Expand Down Expand Up @@ -116,10 +119,6 @@ impl<I: Tokens> Tokens for Capturing<I> {
self.inner.set_expr_allowed(allow);
}

fn set_next_regexp(&mut self, start: Option<swc_common::BytePos>) {
self.inner.set_next_regexp(start);
}

fn add_error(&mut self, error: Error) {
self.inner.add_error(error);
}
Expand Down Expand Up @@ -164,6 +163,12 @@ impl<I: Tokens> Tokens for Capturing<I> {
self.inner.set_token_value(token_value);
}

fn scan_regex(&mut self, start: BytePos) -> (TokenAndSpan, Option<(Atom, Atom)>) {
let result = self.inner.scan_regex(start);
self.capture(result.0);
result
}

fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> TokenAndSpan {
self.inner.scan_jsx_token(allow_multiline_jsx_text)
}
Expand Down
8 changes: 3 additions & 5 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1785,16 +1785,14 @@ impl<'a> Lexer<'a> {
}

/// Expects current char to be '/'
fn read_regexp(&mut self, start: BytePos) -> LexResult<Token> {
pub(crate) fn read_regexp(&mut self, start: BytePos) -> LexResult<(Atom, Atom)> {
unsafe {
// Safety: start is valid position, and cur() is Some('/')
self.input_mut().reset_to(start);
}

debug_assert_eq!(self.cur(), Some('/'));

let start = self.cur_pos();

self.bump(); // bump '/'

let slice_start = self.cur_pos();
Expand Down Expand Up @@ -1830,7 +1828,7 @@ impl<'a> Lexer<'a> {
self.bump();
}

let content = {
let exp = {
let s = unsafe { self.input_slice_to_cur(slice_start) };
self.atom(s)
};
Expand Down Expand Up @@ -1863,7 +1861,7 @@ impl<'a> Lexer<'a> {
}?
.unwrap_or_default();

Ok(Token::regexp(content, flags, self))
Ok((exp, flags))
}

/// This method is optimized for texts without escape sequences.
Expand Down
43 changes: 30 additions & 13 deletions crates/swc_ecma_parser/src/lexer/state.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::mem::take;

use swc_atoms::wtf8::CodePoint;
use swc_atoms::{wtf8::CodePoint, Atom};
use swc_common::BytePos;
use swc_ecma_ast::EsVersion;

Expand Down Expand Up @@ -35,7 +35,6 @@ pub struct State {
pub had_line_break_before_last: bool,
/// TODO: Remove this field.
is_first: bool,
pub next_regexp: Option<BytePos>,
pub start: BytePos,
pub prev_hi: BytePos,

Expand Down Expand Up @@ -111,11 +110,6 @@ impl crate::input::Tokens for Lexer<'_> {
#[inline]
fn set_expr_allowed(&mut self, _: bool) {}

#[inline]
fn set_next_regexp(&mut self, start: Option<BytePos>) {
self.state.next_regexp = start;
}

fn add_error(&mut self, error: Error) {
self.errors.push(error);
}
Expand Down Expand Up @@ -169,6 +163,35 @@ impl crate::input::Tokens for Lexer<'_> {
self.state.token_value.take()
}

fn scan_regex(&mut self, start: BytePos) -> (TokenAndSpan, Option<(Atom, Atom)>) {
let (token, ret) = match self.read_regexp(start) {
Ok(ret) => (Token::Regex, Some(ret)),
Err(error) => {
self.state.set_token_value(TokenValue::Error(error));
(Token::Error, None)
}
};

let span = self.span(start);
if token != Token::Eof {
if let Some(comments) = self.comments_buffer.as_mut() {
comments.pending_to_comment(BufferedCommentKind::Leading, start);
}

self.state.set_token_type(token);
self.state.prev_hi = self.last_pos();
self.state.had_line_break_before_last = self.had_line_break_before_last();
}

// Attach span to token.
let token = TokenAndSpan {
token,
had_line_break: self.had_line_break_before_last(),
span,
};
(token, ret)
}

fn rescan_jsx_token(&mut self, allow_multiline_jsx_text: bool, reset: BytePos) -> TokenAndSpan {
unsafe {
self.input.reset_to(reset);
Expand Down Expand Up @@ -373,11 +396,6 @@ impl crate::input::Tokens for Lexer<'_> {

impl Lexer<'_> {
fn next_token(&mut self, start: &mut BytePos) -> Result<Token, Error> {
if let Some(next_regexp) = self.state.next_regexp {
*start = next_regexp;
return self.read_regexp(next_regexp);
}

if self.state.is_first {
if let Some(shebang) = self.read_shebang()? {
self.state.set_token_value(TokenValue::Word(shebang));
Expand Down Expand Up @@ -593,7 +611,6 @@ impl State {
had_line_break: false,
had_line_break_before_last: false,
is_first: true,
next_regexp: None,
start: BytePos(0),
prev_hi: start_pos,
token_value: None,
Expand Down
24 changes: 1 addition & 23 deletions crates/swc_ecma_parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@ pub enum TokenValue {
value: Wtf8Atom,
raw: Atom,
},
// regexp
Regex {
value: Atom,
flags: Atom,
},
Num {
value: f64,
raw: Atom,
Expand Down Expand Up @@ -356,15 +351,6 @@ impl<'a> Token {
Token::Template
}

#[inline(always)]
pub fn regexp(content: Atom, flags: Atom, lexer: &mut crate::Lexer<'a>) -> Self {
lexer.set_token_value(Some(TokenValue::Regex {
value: content,
flags,
}));
Token::Regex
}

#[inline(always)]
pub fn num(value: f64, raw: Atom, lexer: &mut crate::Lexer<'a>) -> Self {
lexer.set_token_value(Some(TokenValue::Num { value, raw }));
Expand Down Expand Up @@ -457,11 +443,6 @@ impl<'a> Token {
(value.as_atom().cloned().unwrap(), raw)
}

#[inline(always)]
pub fn take_regexp<I: Tokens>(self, buffer: &mut Buffer<I>) -> (Atom, Atom) {
buffer.expect_regex_token_value()
}

#[inline(always)]
pub fn shebang(value: Atom, lexer: &mut Lexer) -> Self {
lexer.set_token_value(Some(TokenValue::Word(value)));
Expand Down Expand Up @@ -651,10 +632,7 @@ impl Token {
return format!("bigint literal ({value}, {raw})");
}
Token::Regex => {
let Some(TokenValue::Regex { value, flags, .. }) = value else {
unreachable!("{:#?}", value)
};
return format!("regexp literal ({value}, {flags})");
return "regexp literal".to_string();
}
Token::Template => {
let Some(TokenValue::Template { raw, .. }) = value else {
Expand Down
63 changes: 26 additions & 37 deletions crates/swc_ecma_parser/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,11 +340,7 @@ impl<I: Tokens> Parser<I> {
return self.parse_lit().map(|lit| lit.into());
}
// Regexp
Token::Slash | Token::DivEq => {
if let Some(res) = self.try_parse_regexp(start) {
return Ok(res);
}
}
Token::Slash | Token::DivEq => return self.parse_regexp(start),
Token::LParen => return self.parse_paren_expr_or_arrow_fn(can_be_arrow, None),
Token::NoSubstitutionTemplateLiteral => {
return Ok(self.parse_no_substitution_template_literal(false)?.into())
Expand Down Expand Up @@ -2592,45 +2588,38 @@ impl<I: Tokens> Parser<I> {
}
}

fn try_parse_regexp(&mut self, start: BytePos) -> Option<Box<Expr>> {
fn parse_regexp(&mut self, start: BytePos) -> PResult<Box<Expr>> {
// Regexp
debug_assert!(self.input().cur() == Token::Slash || self.input().cur() == Token::DivEq);

self.input_mut().set_next_regexp(Some(start));

self.bump(); // `/` or `/=`

let cur = self.input().cur();
if cur == Token::Regex {
self.input_mut().set_next_regexp(None);
let (exp, flags) = self.input_mut().expect_regex_token_and_bump();
let span = self.span(start);

let mut flags_count =
flags
.chars()
.fold(FxHashMap::<char, usize>::default(), |mut map, flag| {
let key = match flag {
// https://tc39.es/ecma262/#sec-isvalidregularexpressionliteral
'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' => flag,
_ => '\u{0000}', // special marker for unknown flags
};
map.entry(key).and_modify(|count| *count += 1).or_insert(1);
map
});
let Some((exp, flags)) = self.input_mut().scan_regex(start) else {
let error = self.input_mut().expect_error_token_and_bump();
return Err(error);
};

if flags_count.remove(&'\u{0000}').is_some() {
self.emit_err(span, SyntaxError::UnknownRegExpFlags);
}
let span = self.span(start);
let mut flags_count =
flags
.chars()
.fold(FxHashMap::<char, usize>::default(), |mut map, flag| {
let key = match flag {
// https://tc39.es/ecma262/#sec-isvalidregularexpressionliteral
'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' => flag,
_ => '\u{0000}', // special marker for unknown flags
};
map.entry(key).and_modify(|count| *count += 1).or_insert(1);
map
});

if let Some((flag, _)) = flags_count.iter().find(|(_, count)| **count > 1) {
self.emit_err(span, SyntaxError::DuplicatedRegExpFlags(*flag));
}
if flags_count.remove(&'\u{0000}').is_some() {
self.emit_err(span, SyntaxError::UnknownRegExpFlags);
}

Some(Lit::Regex(Regex { span, exp, flags }).into())
} else {
None
if let Some((flag, _)) = flags_count.iter().find(|(_, count)| **count > 1) {
self.emit_err(span, SyntaxError::DuplicatedRegExpFlags(*flag));
}

Ok(Lit::Regex(Regex { span, exp, flags }).into())
}

fn try_parse_async_start(&mut self, can_be_arrow: bool) -> Option<PResult<Box<Expr>>> {
Expand Down
30 changes: 9 additions & 21 deletions crates/swc_ecma_parser/src/parser/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ pub trait Tokens: Clone + Iterator<Item = TokenAndSpan> {
}

fn set_expr_allowed(&mut self, allow: bool);
fn set_next_regexp(&mut self, start: Option<BytePos>);

/// Implementors should use Rc<RefCell<Vec<Error>>>.
///
Expand Down Expand Up @@ -60,6 +59,7 @@ pub trait Tokens: Clone + Iterator<Item = TokenAndSpan> {
fn get_token_value(&self) -> Option<&TokenValue>;
fn set_token_value(&mut self, token_value: Option<TokenValue>);

fn scan_regex(&mut self, start: BytePos) -> (TokenAndSpan, Option<(Atom, Atom)>);
fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) -> TokenAndSpan;
fn scan_jsx_open_el_terminal_token(&mut self) -> TokenAndSpan;
fn rescan_jsx_open_el_terminal_token(&mut self, reset: BytePos) -> TokenAndSpan;
Expand Down Expand Up @@ -120,14 +120,6 @@ impl<I: Tokens> Buffer<I> {
(value, raw)
}

pub fn expect_regex_token_value(&mut self) -> (Atom, Atom) {
let Some(crate::lexer::TokenValue::Regex { value, flags }) = self.iter.take_token_value()
else {
unreachable!()
};
(value, flags)
}

pub fn expect_template_token_value(&mut self) -> (LexResult<Wtf8Atom>, Atom) {
let Some(crate::lexer::TokenValue::Template { cooked, raw }) = self.iter.take_token_value()
else {
Expand All @@ -147,6 +139,14 @@ impl<I: Tokens> Buffer<I> {
self.iter.get_token_value()
}

pub(crate) fn scan_regex(&mut self, start: BytePos) -> Option<(Atom, Atom)> {
let prev = self.cur;
let (t, ret) = self.iter.scan_regex(start);
self.prev_span = prev.span;
self.set_cur(t);
ret
}

pub fn scan_jsx_token(&mut self, allow_multiline_jsx_text: bool) {
let prev = self.cur;
let t = self.iter.scan_jsx_token(allow_multiline_jsx_text);
Expand Down Expand Up @@ -346,13 +346,6 @@ impl<I: Tokens> Buffer<I> {
ret
}

pub fn expect_regex_token_and_bump(&mut self) -> (Atom, Atom) {
let cur = self.cur();
let ret = cur.take_regexp(self);
self.bump();
ret
}

pub fn expect_template_token_and_bump(&mut self) -> (LexResult<Wtf8Atom>, Atom) {
let cur = self.cur();
let ret = cur.take_template(self);
Expand Down Expand Up @@ -522,11 +515,6 @@ impl<I: Tokens> Buffer<I> {
self.iter_mut().set_expr_allowed(allow)
}

#[inline]
pub fn set_next_regexp(&mut self, start: Option<BytePos>) {
self.iter_mut().set_next_regexp(start);
}

#[inline]
pub fn end_pos(&self) -> BytePos {
self.iter().end_pos()
Expand Down
Loading