Fix handling of comments

Comments are no longer tokenized. This means they are dropped from
transformed token runs, but preserved elsewhere.
This commit is contained in:
Kovid Goyal 2021-03-21 13:52:18 +05:30
parent dd2450e092
commit d21ef82385
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 16 additions and 21 deletions

View File

@ -235,7 +235,6 @@ enum class TokenType : unsigned int {
function_start,
number,
dimension,
comment,
cdo,
cdc
};
@ -382,7 +381,6 @@ class Token {
bool is_significant() const {
switch(type) {
case TokenType::whitespace:
case TokenType::comment:
case TokenType::cdo:
case TokenType::cdc:
return false;
@ -404,7 +402,7 @@ class Token {
const char* type_name() const {
#define n(x) case TokenType::x: return #x;
switch(type) {
n(whitespace); n(comment); n(cdo); n(cdc); n(ident); n(string); n(number);
n(whitespace); n(cdo); n(cdc); n(ident); n(string); n(number);
n(function_start); n(dimension); n(url); n(delimiter); n(at_keyword); n(hash);
}
#undef n
@ -485,11 +483,6 @@ class Token {
case TokenType::dimension:
out.append(text);
break;
case TokenType::comment:
out.append({'/', '*'});
out.append(text);
out.append({'*', '/'});
break;
case TokenType::cdo:
out.append({'<', '!', '-', '-'});
break;
@ -670,10 +663,6 @@ class TokenQueue {
if (queue.empty() || !queue.back().is_type(TokenType::string)) new_token(TokenType::string);
}
void add_comment(const char32_t ch) {
new_token(TokenType::comment, ch);
}
void add_char(const char32_t ch) {
if (queue.empty()) throw std::logic_error("Attempting to add char to non-existent token");
queue.back().add_char(ch);
@ -940,11 +929,9 @@ class Parser {
// comment {{{
void enter_comment_mode() {
states.push(ParseState::comment);
token_queue.add_comment(ch);
}
void handle_comment() {
token_queue.add_char(ch);
if (ch == '/' && prev_ch == '*') pop_state();
} // }}}
@ -957,6 +944,7 @@ class Parser {
void handle_name() {
if (is_name(ch)) token_queue.add_char(ch);
else if (has_valid_escape()) enter_escape_mode();
else if (starting_comment()) enter_comment_mode();
else {
reconsume();
pop_state();
@ -976,6 +964,7 @@ class Parser {
void handle_number() {
if (is_digit(ch)) { token_queue.add_char(ch); return; }
if (ch == '.' && is_digit(peek())) { pop_state(); enter_digits_mode(); return; }
if (starting_comment()) { enter_comment_mode(); return; }
if ((ch == 'e' || ch == 'E')) {
char32_t next = peek();
if (is_digit(next) || ((next == '+' || next == '-') && is_digit(peek(1)))) {
@ -997,6 +986,7 @@ class Parser {
void handle_digits() {
if (is_digit(ch)) { token_queue.add_char(ch); }
else if (starting_comment()) enter_comment_mode();
else {
reconsume();
pop_state();
@ -1013,6 +1003,7 @@ class Parser {
void handle_dimension() {
if (is_name(ch)) { token_queue.add_char(ch); return; }
if (has_valid_escape()) { enter_escape_mode(); return; }
if (starting_comment()) { enter_comment_mode(); return; }
reconsume();
pop_state();
} // }}}
@ -1026,6 +1017,7 @@ class Parser {
void handle_ident() {
if (is_name(ch)) { token_queue.add_char(ch); return; }
if (has_valid_escape()) { enter_escape_mode(); return; }
if (starting_comment()) { enter_comment_mode(); return; }
pop_state();
if (ch == '(') {
if (token_queue.current_token_text_equals_case_insensitive("url")) enter_url_start_mode();
@ -1043,6 +1035,7 @@ class Parser {
if (is_whitespace(ch)) return;
if (starting_string()) { pop_state(); end_string_with = ch; states.push(ParseState::url_string); return; }
if (ch == ')') { pop_state(); return; }
if (starting_comment()) { enter_comment_mode(); return; }
pop_state(); states.push(ParseState::url);
token_queue.add_char(ch);
}
@ -1053,12 +1046,14 @@ class Parser {
}
void handle_url_after_string() {
if (starting_comment()) { enter_comment_mode(); return; }
if (!is_whitespace(ch)) exit_url_mode();
}
void handle_url() {
if (ch == '\\' && has_valid_escape()) enter_escape_mode();
else if (ch == ')') exit_url_mode(true);
else if (starting_comment()) enter_comment_mode();
else token_queue.add_char(ch);
}

View File

@ -31,23 +31,23 @@ class TestTransform(SimpleTest):
def s(src, expected, url_callback=upper_case):
return d(src, expected, url_callback=url_callback, is_declaration=False)
s('@im/* c */port "x.y";', '@import "X.Y";')
s('@import url("narrow.css") supports(display: flex) handheld and (max-width: 400px);',
'@import url("NARROW.CSS") supports(display: flex) handheld and (max-width: 400px);')
s('@import "x.y";', '@import "X.Y";')
s('@import url( x.y);', '@import url("X.Y");')
u('background: url( te st.gif ) 12; src: url(x)', 'background: url("TE ST.GIF") 12; src: url("X")')
u('background: url(test.gif); xxx: url()', 'background: url("TEST.GIF"); xxx: url()')
u('background: url(te/**/st.gif); xxx: url()', 'background: url("TEST.GIF"); xxx: url()')
u(r'background: url(t\)est.gif)', 'background: url("T)EST.GIF")')
u('a:url( "( )" )', 'a:url("( )")')
u('a:url( "()" )', 'a:url( "()" )', url_callback=lambda x: x)
u('a:url( "( )" /**/ )', 'a:url("( )")')
u('a:url( "(/*)" )', 'a:url( "(/*)" )', url_callback=lambda x: x)
d(r'f\ont-s\69z\65 : 16\px', 'font-size: 1rem')
d('font -size: 16px', 'font -size: 16px')
d('font-size: 16px !important', 'font-size: 1rem !important')
d('font-/* */size: 1/*x*/6/**/p/**/x !important', 'font-size: 1rem !important')
d('fOnt-size :16px', 'fOnt-size :1rem')
d('font-size:2%', 'font-size:2%')
d('font-size: 72pt; margin: 20px; font-size: 2in', 'font-size: 6rem; margin: 20px; font-size: 12rem')
d('font-size: 72pt; margin: /*here*/ 20px; font-size: 2in', 'font-size: 6rem; margin: /*here*/ 20px; font-size: 12rem')
d(r'''font: "some 'name" 32px''', 'font: "some \'name" 2rem')
d(r'''font: 'some "name' 32px''', 'font: \'some "name\' 2rem')
d(r'''font: 'some \n ame' 32px''', 'font: "some n ame" 2rem')
@ -55,5 +55,5 @@ class TestTransform(SimpleTest):
d('font: sans-serif 16px/3', 'font: sans-serif 1rem/3')
d('font: sans-serif small/17', 'font: sans-serif 0.8rem/17')
d('-epub-writing-mode: a; -webkit-writing-mode: b; writing-mode: c', 'writing-mode: a; writing-mode: b; writing-mode: c')
d('-epub-writing-mode: a; -web/* */kit-writing-mode: b; writing-mode: c', 'writing-mode: a; writing-mode: b; writing-mode: c')
d('xxx:yyy', 'xxx:yyy')