Fix handling of comments

Comments are no longer tokenized. This means they are dropped from transformed token runs, but preserved elsewhere.
2025-08-30 23:00:21 -04:00 · 2021-03-21 13:52:18 +05:30 · 2021-03-21 13:52:18 +05:30 · d21ef82385
commit d21ef82385
parent dd2450e092
2 changed files with 16 additions and 21 deletions
--- a/src/calibre/srv/fast_css_transform.cpp
+++ b/src/calibre/srv/fast_css_transform.cpp
@ -235,7 +235,6 @@ enum class TokenType : unsigned int {
    function_start,
    number,
    dimension,
-    comment,
    cdo,
    cdc
 };
@ -382,7 +381,6 @@ class Token {
 		bool is_significant() const {
 			switch(type) {
 				case TokenType::whitespace:
-				case TokenType::comment:
 				case TokenType::cdo:
 				case TokenType::cdc:
 					return false;
@ -404,7 +402,7 @@ class Token {
        const char* type_name() const {
 #define n(x) case TokenType::x: return #x;
            switch(type) {
-                n(whitespace); n(comment); n(cdo); n(cdc); n(ident); n(string); n(number);
+                n(whitespace); n(cdo); n(cdc); n(ident); n(string); n(number);
                n(function_start); n(dimension); n(url); n(delimiter); n(at_keyword); n(hash);
            }
 #undef n
@ -485,11 +483,6 @@ class Token {
                case TokenType::dimension:
                    out.append(text);
                    break;
-                case TokenType::comment:
-                    out.append({'/', '*'});
-                    out.append(text);
-                    out.append({'*', '/'});
-                    break;
                case TokenType::cdo:
                    out.append({'<', '!', '-', '-'});
                    break;
@ -670,10 +663,6 @@ class TokenQueue {
            if (queue.empty() || !queue.back().is_type(TokenType::string)) new_token(TokenType::string);
        }

-        void add_comment(const char32_t ch) {
-            new_token(TokenType::comment, ch);
-        }
-
        void add_char(const char32_t ch) {
            if (queue.empty()) throw std::logic_error("Attempting to add char to non-existent token");
            queue.back().add_char(ch);
@ -940,11 +929,9 @@ class Parser {
        // comment {{{
        void enter_comment_mode() {
            states.push(ParseState::comment);
-            token_queue.add_comment(ch);
        }

        void handle_comment() {
-            token_queue.add_char(ch);
            if (ch == '/' && prev_ch == '*') pop_state();
        } // }}}

@ -957,6 +944,7 @@ class Parser {
        void handle_name() {
            if (is_name(ch)) token_queue.add_char(ch);
            else if (has_valid_escape()) enter_escape_mode();
+            else if (starting_comment()) enter_comment_mode();
            else {
                reconsume();
                pop_state();
@ -976,6 +964,7 @@ class Parser {
        void handle_number() {
            if (is_digit(ch)) { token_queue.add_char(ch); return; }
            if (ch == '.' && is_digit(peek())) { pop_state(); enter_digits_mode(); return; }
+            if (starting_comment()) { enter_comment_mode(); return; }
            if ((ch == 'e' || ch == 'E')) {
                char32_t next = peek();
                if (is_digit(next) || ((next == '+' || next == '-') && is_digit(peek(1)))) {
@ -997,6 +986,7 @@ class Parser {

        void handle_digits() {
            if (is_digit(ch)) { token_queue.add_char(ch); }
+            else if (starting_comment()) enter_comment_mode();
            else {
                reconsume();
                pop_state();
@ -1013,6 +1003,7 @@ class Parser {
        void handle_dimension() {
            if (is_name(ch)) { token_queue.add_char(ch); return; }
            if (has_valid_escape()) { enter_escape_mode(); return; }
+            if (starting_comment()) { enter_comment_mode(); return; }
            reconsume();
            pop_state();
        } // }}}
@ -1026,6 +1017,7 @@ class Parser {
        void handle_ident() {
            if (is_name(ch)) { token_queue.add_char(ch); return; }
            if (has_valid_escape()) { enter_escape_mode(); return; }
+            if (starting_comment()) { enter_comment_mode(); return; }
            pop_state();
            if (ch == '(') {
                if (token_queue.current_token_text_equals_case_insensitive("url")) enter_url_start_mode();
@ -1043,6 +1035,7 @@ class Parser {
            if (is_whitespace(ch)) return;
            if (starting_string()) { pop_state(); end_string_with = ch; states.push(ParseState::url_string); return; }
            if (ch == ')') { pop_state(); return; }
+            if (starting_comment()) { enter_comment_mode(); return; }
            pop_state(); states.push(ParseState::url);
            token_queue.add_char(ch);
        }
@ -1053,12 +1046,14 @@ class Parser {
        }

        void handle_url_after_string() {
+            if (starting_comment()) { enter_comment_mode(); return; }
            if (!is_whitespace(ch)) exit_url_mode();
        }

        void handle_url() {
            if (ch == '\\' && has_valid_escape()) enter_escape_mode();
            else if (ch == ')') exit_url_mode(true);
+            else if (starting_comment()) enter_comment_mode();
            else token_queue.add_char(ch);
        }

--- a/src/calibre/srv/tests/fast_css_transform.py
+++ b/src/calibre/srv/tests/fast_css_transform.py
@ -31,23 +31,23 @@ class TestTransform(SimpleTest):
        def s(src, expected, url_callback=upper_case):
            return d(src, expected, url_callback=url_callback, is_declaration=False)

+        s('@im/* c */port "x.y";', '@import "X.Y";')
        s('@import url("narrow.css") supports(display: flex) handheld and (max-width: 400px);',
          '@import url("NARROW.CSS") supports(display: flex) handheld and (max-width: 400px);')
-        s('@import "x.y";', '@import "X.Y";')
        s('@import url( x.y);', '@import url("X.Y");')

        u('background: url(  te  st.gif  ) 12; src: url(x)', 'background: url("TE  ST.GIF") 12; src: url("X")')
-        u('background: url(test.gif); xxx: url()', 'background: url("TEST.GIF"); xxx: url()')
+        u('background: url(te/**/st.gif); xxx: url()', 'background: url("TEST.GIF"); xxx: url()')
        u(r'background: url(t\)est.gif)', 'background: url("T)EST.GIF")')
-        u('a:url(  "( )"  )', 'a:url("( )")')
-        u('a:url(  "()"  )', 'a:url(  "()"  )', url_callback=lambda x: x)
+        u('a:url(  "( )" /**/ )', 'a:url("( )")')
+        u('a:url(  "(/*)"  )', 'a:url(  "(/*)"  )', url_callback=lambda x: x)

        d(r'f\ont-s\69z\65 : 16\px', 'font-size: 1rem')
        d('font -size: 16px', 'font -size: 16px')
-        d('font-size: 16px !important', 'font-size: 1rem !important')
+        d('font-/* */size: 1/*x*/6/**/p/**/x !important', 'font-size: 1rem !important')
        d('fOnt-size :16px', 'fOnt-size :1rem')
        d('font-size:2%', 'font-size:2%')
-        d('font-size: 72pt; margin: 20px; font-size: 2in', 'font-size: 6rem; margin: 20px; font-size: 12rem')
+        d('font-size: 72pt; margin: /*here*/ 20px; font-size: 2in', 'font-size: 6rem; margin: /*here*/ 20px; font-size: 12rem')
        d(r'''font: "some 'name" 32px''', 'font: "some \'name" 2rem')
        d(r'''font: 'some "name' 32px''', 'font: \'some "name\' 2rem')
        d(r'''font: 'some \n ame' 32px''', 'font: "some n ame" 2rem')
@ -55,5 +55,5 @@ class TestTransform(SimpleTest):
        d('font: sans-serif 16px/3', 'font: sans-serif 1rem/3')
        d('font: sans-serif small/17', 'font: sans-serif 0.8rem/17')

-        d('-epub-writing-mode: a; -webkit-writing-mode: b; writing-mode: c', 'writing-mode: a; writing-mode: b; writing-mode: c')
+        d('-epub-writing-mode: a; -web/* */kit-writing-mode: b; writing-mode: c', 'writing-mode: a; writing-mode: b; writing-mode: c')
        d('xxx:yyy', 'xxx:yyy')