Start work on fast CSS transformer

This commit is contained in:
Kovid Goyal 2021-03-06 11:43:02 +05:30
parent c854a7b670
commit 508200f8e9
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 238 additions and 0 deletions

View File

@ -114,6 +114,11 @@
"sources": "calibre/srv/html_as_json.cpp",
"needs_c++11": true
},
{
"name": "fast_css_transform",
"sources": "calibre/srv/fast_css_transform.cpp",
"needs_c++11": true
},
{
"name": "pictureflow",
"sources": "calibre/gui2/pictureflow/pictureflow.cpp",

View File

@ -236,6 +236,7 @@ class ExtensionsImporter:
'icu',
'speedup',
'html_as_json',
'fast_css_transform',
'unicode_names',
'html_syntax_highlighter',
'hyphen',

View File

@ -0,0 +1,232 @@
/*
* fast_css_transform.cpp
* Copyright (C) 2021 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#define PY_SSIZE_T_CLEAN
#define UNICODE
#define _UNICODE
#include <Python.h>
#include <stdlib.h>
#include <bitset>
#include <vector>
#include <stack>
#include <exception>
#define arraysz(x) (sizeof((x))/sizeof((x)[0]))
typedef uint32_t char_type;
static inline bool
is_whitespace(char_type ch) {
return ch == ' ' || ch == '\n' || ch == '\t';
}
static inline bool
is_surrogate(char_type ch) {
return 0xd800 <= ch && ch <= 0xdfff;
}
static inline bool
is_hex_char(char_type ch) {
return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch || ch <= 'F');
}
typedef enum {
BLOCK,
ESCAPE,
COMMENT,
STRING,
QUALIFIED_RULE,
AT_RULE,
KEY,
VALUE
} ParseStates;
typedef enum {
DECLARATIONS_ALLOWED,
AT_RULES_ALLOWED,
QUALIFIED_RULES_ALLOWED,
NUM_OF_BLOCK_TYPE_FLAGS
} BlockTypesEnum;
typedef std::bitset<NUM_OF_BLOCK_TYPE_FLAGS> BlockTypeFlags;
class Parser {
private:
ParseStates state, state_before_comment, state_before_string, state_before_escape;
char_type ch, next_ch, end_string_with;
std::stack<BlockTypeFlags> block_types;
char escape_buf[16];
const char_type *src;
size_t escape_buf_pos, src_sz, src_pos, declaration_pos;
std::vector<char_type> out, current_key;
bool declarations_allowed() const { return block_types.top()[DECLARATIONS_ALLOWED]; }
bool at_rules_allowed() const { return block_types.top()[AT_RULES_ALLOWED]; }
bool qualified_rules_allowed() const { return block_types.top()[QUALIFIED_RULES_ALLOWED]; }
void enter_escape_mode() {
state_before_escape = state; escape_buf_pos = 0; state = ESCAPE;
}
void enter_string_mode() {
state_before_string = state; state = STRING; end_string_with = ch;
}
void handle_comment() {
if (ch == '*' && next_ch == '/') {
out.push_back(next_ch);
state = state_before_comment;
src_pos++;
}
}
void handle_escape() {
if (!escape_buf_pos) {
if (ch == '\n') { state = state_before_escape; return; }
if (!is_hex_char(ch)) {
state = state_before_escape;
if (state == KEY) { current_key.push_back(ch); }
return;
}
escape_buf[escape_buf_pos++] = ch;
return;
}
if (is_hex_char(ch) && escape_buf_pos < 6) { escape_buf[escape_buf_pos++] = ch; return; }
if (is_whitespace(ch)) return; // a single whitespace character is absorbed into escape
src_pos--;
state = state_before_escape;
if (state == KEY) {
escape_buf[escape_buf_pos] = 0;
long kch = strtol(escape_buf, NULL, 16);
if (kch > 0 && !is_surrogate(kch)) { current_key.push_back(kch); }
}
escape_buf_pos = 0;
}
void handle_string() {
if (ch == '\\') { enter_escape_mode(); }
else if (ch == end_string_with) state = state_before_string;
}
void enter_comment_mode() {
src_pos++;
state_before_comment = state;
state = COMMENT;
out.push_back(next_ch);
}
void handle_block() {
if (ch == '/' && next_ch == '*') { enter_comment_mode(); return; }
if (ch == '@' && at_rules_allowed()) {
state = AT_RULE;
return;
}
if (ch == ';' || ch == '{' || ch == '}' || is_whitespace(ch)) return;
if (declarations_allowed()) {
state = KEY;
current_key.clear();
declaration_pos = out.size() > 1 ? out.size() - 2 : 0;
} else {
state = QUALIFIED_RULE;
}
if (ch == '"' || ch == '\'') { enter_string_mode(); }
else if (ch == '\\') { enter_escape_mode(); }
else if (state == KEY) current_key.push_back(ch);
}
void dispatch_current_char() {
out.push_back(ch);
switch (state) {
case COMMENT:
handle_comment(); break;
case ESCAPE:
handle_escape(); break;
case STRING:
handle_string(); break;
case BLOCK:
handle_block(); break;
}
}
public:
Parser(const char_type *src, size_t src_sz, bool is_declaration) :
state(BLOCK), state_before_comment(BLOCK), state_before_string(BLOCK), state_before_escape(BLOCK),
ch(0), next_ch(0), end_string_with('"'), block_types(), escape_buf(),
src(src), escape_buf_pos(0), src_sz(src_sz), src_pos(0), declaration_pos(0),
out(src_sz * 2), current_key(256)
{
BlockTypeFlags initial_block_type;
initial_block_type.set(DECLARATIONS_ALLOWED);
if (!is_declaration) {
initial_block_type.set(AT_RULES_ALLOWED);
initial_block_type.set(QUALIFIED_RULES_ALLOWED);
}
block_types.push(initial_block_type);
}
void parse(std::vector<char_type> &result) {
while (src_pos < src_sz) {
ch = src[src_pos++];
next_ch = src_pos < src_sz ? src[src_pos] : 0;
if (ch == 0xc) ch = '\n';
if (ch == '\r') {
if (next_ch == '\n') { ch = '\n'; src_pos++; }
else ch = '\n';
}
if (ch == 0 || is_surrogate(ch)) ch = 0xfffd;
dispatch_current_char();
}
out.swap(result);
}
};
#undef write_key
#undef write
static PyObject*
transform_properties(const char_type *src, size_t src_sz, bool is_declaration) {
try {
std::vector<char_type> result(0);
Parser parser(src, src_sz, is_declaration);
parser.parse(result);
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, result.data(), result.size());
} catch (std::bad_alloc &ex) {
return PyErr_NoMemory();
} catch (std::exception &ex) {
PyErr_SetString(PyExc_Exception, ex.what());
return NULL;
} catch (...) {
PyErr_SetString(PyExc_Exception, "Unknown error while parsing CSS");
return NULL;
}
}
static PyMethodDef methods[] = {
{NULL, NULL, 0, NULL}
};
static int
exec_module(PyObject *m) {
return 0;
}
static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT};
CALIBRE_MODINIT_FUNC PyInit_fast_css_transform(void) {
module_def.m_name = "fast_css_transform";
module_def.m_doc = "Fast CSS transformations needed for viewer";
module_def.m_methods = methods;
module_def.m_slots = slots;
return PyModuleDef_Init(&module_def);
}