Fix handling of comments

Comments are no longer tokenized. This means they are dropped from
transformed token runs, but preserved elsewhere.
This commit is contained in:
Kovid Goyal 2021-03-21 13:52:18 +05:30
parent dd2450e092
commit d21ef82385
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 16 additions and 21 deletions

View File

@ -235,7 +235,6 @@ enum class TokenType : unsigned int {
function_start, function_start,
number, number,
dimension, dimension,
comment,
cdo, cdo,
cdc cdc
}; };
@ -382,7 +381,6 @@ class Token {
bool is_significant() const { bool is_significant() const {
switch(type) { switch(type) {
case TokenType::whitespace: case TokenType::whitespace:
case TokenType::comment:
case TokenType::cdo: case TokenType::cdo:
case TokenType::cdc: case TokenType::cdc:
return false; return false;
@ -404,7 +402,7 @@ class Token {
const char* type_name() const { const char* type_name() const {
#define n(x) case TokenType::x: return #x; #define n(x) case TokenType::x: return #x;
switch(type) { switch(type) {
n(whitespace); n(comment); n(cdo); n(cdc); n(ident); n(string); n(number); n(whitespace); n(cdo); n(cdc); n(ident); n(string); n(number);
n(function_start); n(dimension); n(url); n(delimiter); n(at_keyword); n(hash); n(function_start); n(dimension); n(url); n(delimiter); n(at_keyword); n(hash);
} }
#undef n #undef n
@ -485,11 +483,6 @@ class Token {
case TokenType::dimension: case TokenType::dimension:
out.append(text); out.append(text);
break; break;
case TokenType::comment:
out.append({'/', '*'});
out.append(text);
out.append({'*', '/'});
break;
case TokenType::cdo: case TokenType::cdo:
out.append({'<', '!', '-', '-'}); out.append({'<', '!', '-', '-'});
break; break;
@ -670,10 +663,6 @@ class TokenQueue {
if (queue.empty() || !queue.back().is_type(TokenType::string)) new_token(TokenType::string); if (queue.empty() || !queue.back().is_type(TokenType::string)) new_token(TokenType::string);
} }
void add_comment(const char32_t ch) {
new_token(TokenType::comment, ch);
}
void add_char(const char32_t ch) { void add_char(const char32_t ch) {
if (queue.empty()) throw std::logic_error("Attempting to add char to non-existent token"); if (queue.empty()) throw std::logic_error("Attempting to add char to non-existent token");
queue.back().add_char(ch); queue.back().add_char(ch);
@ -940,11 +929,9 @@ class Parser {
// comment {{{ // comment {{{
void enter_comment_mode() { void enter_comment_mode() {
states.push(ParseState::comment); states.push(ParseState::comment);
token_queue.add_comment(ch);
} }
void handle_comment() { void handle_comment() {
token_queue.add_char(ch);
if (ch == '/' && prev_ch == '*') pop_state(); if (ch == '/' && prev_ch == '*') pop_state();
} // }}} } // }}}
@ -957,6 +944,7 @@ class Parser {
void handle_name() { void handle_name() {
if (is_name(ch)) token_queue.add_char(ch); if (is_name(ch)) token_queue.add_char(ch);
else if (has_valid_escape()) enter_escape_mode(); else if (has_valid_escape()) enter_escape_mode();
else if (starting_comment()) enter_comment_mode();
else { else {
reconsume(); reconsume();
pop_state(); pop_state();
@ -976,6 +964,7 @@ class Parser {
void handle_number() { void handle_number() {
if (is_digit(ch)) { token_queue.add_char(ch); return; } if (is_digit(ch)) { token_queue.add_char(ch); return; }
if (ch == '.' && is_digit(peek())) { pop_state(); enter_digits_mode(); return; } if (ch == '.' && is_digit(peek())) { pop_state(); enter_digits_mode(); return; }
if (starting_comment()) { enter_comment_mode(); return; }
if ((ch == 'e' || ch == 'E')) { if ((ch == 'e' || ch == 'E')) {
char32_t next = peek(); char32_t next = peek();
if (is_digit(next) || ((next == '+' || next == '-') && is_digit(peek(1)))) { if (is_digit(next) || ((next == '+' || next == '-') && is_digit(peek(1)))) {
@ -997,6 +986,7 @@ class Parser {
void handle_digits() { void handle_digits() {
if (is_digit(ch)) { token_queue.add_char(ch); } if (is_digit(ch)) { token_queue.add_char(ch); }
else if (starting_comment()) enter_comment_mode();
else { else {
reconsume(); reconsume();
pop_state(); pop_state();
@ -1013,6 +1003,7 @@ class Parser {
void handle_dimension() { void handle_dimension() {
if (is_name(ch)) { token_queue.add_char(ch); return; } if (is_name(ch)) { token_queue.add_char(ch); return; }
if (has_valid_escape()) { enter_escape_mode(); return; } if (has_valid_escape()) { enter_escape_mode(); return; }
if (starting_comment()) { enter_comment_mode(); return; }
reconsume(); reconsume();
pop_state(); pop_state();
} // }}} } // }}}
@ -1026,6 +1017,7 @@ class Parser {
void handle_ident() { void handle_ident() {
if (is_name(ch)) { token_queue.add_char(ch); return; } if (is_name(ch)) { token_queue.add_char(ch); return; }
if (has_valid_escape()) { enter_escape_mode(); return; } if (has_valid_escape()) { enter_escape_mode(); return; }
if (starting_comment()) { enter_comment_mode(); return; }
pop_state(); pop_state();
if (ch == '(') { if (ch == '(') {
if (token_queue.current_token_text_equals_case_insensitive("url")) enter_url_start_mode(); if (token_queue.current_token_text_equals_case_insensitive("url")) enter_url_start_mode();
@ -1043,6 +1035,7 @@ class Parser {
if (is_whitespace(ch)) return; if (is_whitespace(ch)) return;
if (starting_string()) { pop_state(); end_string_with = ch; states.push(ParseState::url_string); return; } if (starting_string()) { pop_state(); end_string_with = ch; states.push(ParseState::url_string); return; }
if (ch == ')') { pop_state(); return; } if (ch == ')') { pop_state(); return; }
if (starting_comment()) { enter_comment_mode(); return; }
pop_state(); states.push(ParseState::url); pop_state(); states.push(ParseState::url);
token_queue.add_char(ch); token_queue.add_char(ch);
} }
@ -1053,12 +1046,14 @@ class Parser {
} }
void handle_url_after_string() { void handle_url_after_string() {
if (starting_comment()) { enter_comment_mode(); return; }
if (!is_whitespace(ch)) exit_url_mode(); if (!is_whitespace(ch)) exit_url_mode();
} }
void handle_url() { void handle_url() {
if (ch == '\\' && has_valid_escape()) enter_escape_mode(); if (ch == '\\' && has_valid_escape()) enter_escape_mode();
else if (ch == ')') exit_url_mode(true); else if (ch == ')') exit_url_mode(true);
else if (starting_comment()) enter_comment_mode();
else token_queue.add_char(ch); else token_queue.add_char(ch);
} }

View File

@ -31,23 +31,23 @@ class TestTransform(SimpleTest):
def s(src, expected, url_callback=upper_case): def s(src, expected, url_callback=upper_case):
return d(src, expected, url_callback=url_callback, is_declaration=False) return d(src, expected, url_callback=url_callback, is_declaration=False)
s('@im/* c */port "x.y";', '@import "X.Y";')
s('@import url("narrow.css") supports(display: flex) handheld and (max-width: 400px);', s('@import url("narrow.css") supports(display: flex) handheld and (max-width: 400px);',
'@import url("NARROW.CSS") supports(display: flex) handheld and (max-width: 400px);') '@import url("NARROW.CSS") supports(display: flex) handheld and (max-width: 400px);')
s('@import "x.y";', '@import "X.Y";')
s('@import url( x.y);', '@import url("X.Y");') s('@import url( x.y);', '@import url("X.Y");')
u('background: url( te st.gif ) 12; src: url(x)', 'background: url("TE ST.GIF") 12; src: url("X")') u('background: url( te st.gif ) 12; src: url(x)', 'background: url("TE ST.GIF") 12; src: url("X")')
u('background: url(test.gif); xxx: url()', 'background: url("TEST.GIF"); xxx: url()') u('background: url(te/**/st.gif); xxx: url()', 'background: url("TEST.GIF"); xxx: url()')
u(r'background: url(t\)est.gif)', 'background: url("T)EST.GIF")') u(r'background: url(t\)est.gif)', 'background: url("T)EST.GIF")')
u('a:url( "( )" )', 'a:url("( )")') u('a:url( "( )" /**/ )', 'a:url("( )")')
u('a:url( "()" )', 'a:url( "()" )', url_callback=lambda x: x) u('a:url( "(/*)" )', 'a:url( "(/*)" )', url_callback=lambda x: x)
d(r'f\ont-s\69z\65 : 16\px', 'font-size: 1rem') d(r'f\ont-s\69z\65 : 16\px', 'font-size: 1rem')
d('font -size: 16px', 'font -size: 16px') d('font -size: 16px', 'font -size: 16px')
d('font-size: 16px !important', 'font-size: 1rem !important') d('font-/* */size: 1/*x*/6/**/p/**/x !important', 'font-size: 1rem !important')
d('fOnt-size :16px', 'fOnt-size :1rem') d('fOnt-size :16px', 'fOnt-size :1rem')
d('font-size:2%', 'font-size:2%') d('font-size:2%', 'font-size:2%')
d('font-size: 72pt; margin: 20px; font-size: 2in', 'font-size: 6rem; margin: 20px; font-size: 12rem') d('font-size: 72pt; margin: /*here*/ 20px; font-size: 2in', 'font-size: 6rem; margin: /*here*/ 20px; font-size: 12rem')
d(r'''font: "some 'name" 32px''', 'font: "some \'name" 2rem') d(r'''font: "some 'name" 32px''', 'font: "some \'name" 2rem')
d(r'''font: 'some "name' 32px''', 'font: \'some "name\' 2rem') d(r'''font: 'some "name' 32px''', 'font: \'some "name\' 2rem')
d(r'''font: 'some \n ame' 32px''', 'font: "some n ame" 2rem') d(r'''font: 'some \n ame' 32px''', 'font: "some n ame" 2rem')
@ -55,5 +55,5 @@ class TestTransform(SimpleTest):
d('font: sans-serif 16px/3', 'font: sans-serif 1rem/3') d('font: sans-serif 16px/3', 'font: sans-serif 1rem/3')
d('font: sans-serif small/17', 'font: sans-serif 0.8rem/17') d('font: sans-serif small/17', 'font: sans-serif 0.8rem/17')
d('-epub-writing-mode: a; -webkit-writing-mode: b; writing-mode: c', 'writing-mode: a; writing-mode: b; writing-mode: c') d('-epub-writing-mode: a; -web/* */kit-writing-mode: b; writing-mode: c', 'writing-mode: a; writing-mode: b; writing-mode: c')
d('xxx:yyy', 'xxx:yyy') d('xxx:yyy', 'xxx:yyy')