diff --git a/setup/extensions.json b/setup/extensions.json index 6e6ee02c9f..4900ff5b88 100644 --- a/setup/extensions.json +++ b/setup/extensions.json @@ -117,7 +117,8 @@ { "name": "fast_css_transform", "sources": "calibre/srv/fast_css_transform.cpp", - "needs_c++11": true + "inc_dirs": "perfect-hashing", + "needs_c++14": true }, { "name": "pictureflow", diff --git a/src/calibre/srv/fast_css_transform.cpp b/src/calibre/srv/fast_css_transform.cpp index 1828e0197c..1a48054094 100644 --- a/src/calibre/srv/fast_css_transform.cpp +++ b/src/calibre/srv/fast_css_transform.cpp @@ -17,6 +17,8 @@ #include #include #include +#include +#include // character classes {{{ static inline bool @@ -53,6 +55,12 @@ static inline bool is_name(char32_t ch) { return is_name_start(ch) || is_digit(ch) || ch == '-'; } + +static inline bool +is_printable_ascii(char32_t ch) { + return ch >= ' ' && ch <= '~'; +} + // }}} class python_error : public std::runtime_error { @@ -79,6 +87,129 @@ class pyobject_raii { PyObject *detach() { PyObject *ans = handle; handle = NULL; return ans; } }; +typedef long long integer_type; + +class ParsedNumber { + public: + bool is_integer; + integer_type integer_value; + double float_value; + ParsedNumber(integer_type val) : is_integer(true), integer_value(val), float_value(0) {} + ParsedNumber(double val) : is_integer(false), integer_value(0), float_value(val) {} +}; + +static const double base_font_size = 16.0, dpi = 96.0, pt_to_px = dpi / 72.0, pt_to_rem = pt_to_px / base_font_size; + +static double +convert_font_size(double val, double factor) { + return (factor == 0.0) ? val / base_font_size : (val * factor * pt_to_rem); +} + +static integer_type +ipow(integer_type base, integer_type exp) { + integer_type result = 1; + while(true) { + if (exp & 1) result *= base; + exp >>= 1; + if (!exp) break; + base *= base; + } + return result; +} + +template +static integer_type +parse_integer(const T &src, const size_t first, size_t last) { + integer_type ans = 0, base = 1; + while(true) { + integer_type digit = src[last] - '0'; + ans += digit * base; + if (last == first) break; + last--; + base *= 10; + } + return ans; +} + +template +static ParsedNumber +parse_css_number(const T &src) { + int sign = 1, exponent_sign = 1; + integer_type integer_part = 0, fractional_part = 0, exponent_part = 0; + unsigned num_of_fractional_digits = 0; + size_t first_digit = 0, last_digit = 0; + const size_t src_sz = src.size(); + size_t pos = 0; +#define read_sign(which) { if (pos < src_sz && (src[pos] == '+' || src[pos] == '-')) { if (src[pos++] == '-') which = -1; }} +#define read_integer(which) { \ + if (pos < src_sz && is_digit(src[pos])) { \ + first_digit = pos; \ + while (pos + 1 < src_sz && is_digit(src[pos+1])) pos++; \ + last_digit = pos++; \ + which = parse_integer(src, first_digit, last_digit); \ + }} + read_sign(sign); + read_integer(integer_part); + if (pos < src_sz && src[pos] == '.') { + pos++; + read_integer(fractional_part); + if (fractional_part) num_of_fractional_digits = last_digit - first_digit + 1; + } + if (pos < src_sz && (src[pos] == 'e' || src[pos] == 'E')) { + pos++; + read_sign(exponent_sign); + read_integer(exponent_part); + } + if (fractional_part || (exponent_part && exponent_sign == -1)) { + double ans = integer_part; + if (fractional_part) ans += ((double) fractional_part) / ((double)(ipow(10, num_of_fractional_digits))); + if (exponent_part) { + if (exponent_sign == -1) ans /= (double)ipow(10, exponent_part); + else ans *= ipow(10, exponent_part); + } + return ParsedNumber(sign * ans); + } + return ParsedNumber(sign * integer_part * ipow(10, exponent_part)); +#undef read_sign +#undef read_integer +} + +enum class PropertyType : unsigned int { + font_size, page_break, non_standard_writing_mode +}; + +constexpr auto known_properties = frozen::make_unordered_map({ + {"font-size", PropertyType::font_size}, + {"font", PropertyType::font_size}, + + {"page-break-before", PropertyType::page_break}, + {"page-break-after", PropertyType::page_break}, + {"page-break-inside", PropertyType::page_break}, + + {"-webkit-writing-mode", PropertyType::non_standard_writing_mode}, + {"-epub-writing-mode", PropertyType::non_standard_writing_mode}, +}); + +constexpr auto font_size_keywords = frozen::make_unordered_map({ + {"xx-small", "0.5rem"}, + {"x-small", "0.625rem"}, + {"small", "0.8rem"}, + {"medium", "1rem"}, + {"large", "1.125rem"}, + {"x-large", "1.5rem"}, + {"xx-large", "2rem"}, + {"xxx-large", "2.55rem"} +}); + +constexpr auto absolute_length_units = frozen::make_unordered_map({ + {"mm", 2.8346456693}, + {"cm", 28.346456693}, + {"in", 72}, + {"pc", 12}, + {"q", 0.708661417325}, + {"px", 0.0}, + {"pt", 1.0} +}); enum class TokenType : unsigned int { @@ -97,7 +228,7 @@ enum class TokenType : unsigned int { comment, cdo, cdc -} TokenTypes; +}; class Token { @@ -143,6 +274,7 @@ class Token { text.clear(); unit_at = 0; out_pos = 0; type = TokenType::whitespace; } + TokenType get_type() const { return type; } void set_type(const TokenType q) { type = q; } void set_output_position(const size_t val) { out_pos = val; } bool is_type(const TokenType q) const { return type == q; } @@ -163,6 +295,17 @@ class Token { return true; } + bool text_as_ascii_lowercase(std::string &scratch) { + scratch.clear(); + for (auto ch : text) { + if (is_printable_ascii(ch)) { + if ('A' <= ch && ch <= 'Z') ch += 'a' - 'A'; + scratch.push_back(ch); + } else return false; + } + return true; + } + bool is_keyword_case_insensitive(const char *lowercase_text) const { return type == TokenType::ident && text_equals_case_insensitive(lowercase_text); } @@ -183,18 +326,37 @@ class Token { } } - PyObject* text_as_python_string() const { + PyObject* get_text() const { PyObject *ans = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, text.data(), text.size()); if (ans == NULL) throw python_error("Failed to convert token value to python unicode object"); return ans; } - void set_text_from_python_string(const PyObject* src) { + void erase_text_substring(size_t pos, size_t len) { + text.replace(pos, len, (size_t)0u, 0); + } + + void set_text(const PyObject* src) { if (PyUnicode_READY(src) != 0) throw python_error("Failed to set token value from unicode object as readying the unicode obect failed"); text.clear(); int kind = PyUnicode_KIND(src); void *data = PyUnicode_DATA(src); for (Py_ssize_t i = 0; i < PyUnicode_GET_LENGTH(src); i++) text.push_back(PyUnicode_READ(kind, data, i)); } + + void set_text(const char* src) { + text.clear(); + while(*src) text.push_back(*(src++)); + } + + void set_text(const frozen::string &src) { + text.clear(); + for (size_t i = 0; i < src.size(); i++) text.push_back(src[i]); + } + + bool parse_dimension(std::string &scratch) { + if (!text_as_ascii_lowercase(scratch)) return false; + } + }; class TokenQueue { @@ -202,6 +364,7 @@ class TokenQueue { std::stack pool; std::vector queue; std::u32string out; + std::string scratch, scratch2; pyobject_raii url_callback; void new_token(const TokenType type, const char32_t ch = 0) { @@ -252,12 +415,12 @@ class TokenQueue { if (url_callback) { for (auto& tok : queue) { if (tok.is_type(type)) { - pyobject_raii url(tok.text_as_python_string()); + pyobject_raii url(tok.get_text()); pyobject_raii new_url(PyObject_CallFunctionObjArgs(url_callback.ptr(), url.ptr(), NULL)); if (!new_url) { PyErr_Print(); } else { if (PyUnicode_Check(new_url.ptr()) && new_url.ptr() != url.ptr()) { - tok.set_text_from_python_string(new_url.ptr()); + tok.set_text(new_url.ptr()); changed = true; } } @@ -269,10 +432,10 @@ class TokenQueue { bool process_declaration() { bool changed = false; - bool colon_found = false, key_found = false; + bool colon_found = false, key_found = false, keep_going = true; std::function::iterator)> process_values; - for (auto it = queue.begin(); it < queue.end(); it++) { + for (auto it = queue.begin(); keep_going && it < queue.end(); it++) { if (!it->is_significant()) continue; if (key_found) { if (colon_found) { @@ -285,8 +448,22 @@ class TokenQueue { } else { if (it->is_type(TokenType::ident)) { key_found = true; - if (it->text_equals_case_insensitive("font") || it->text_equals_case_insensitive("font-size")) { - process_values = std::bind(&TokenQueue::process_font_sizes, this, std::placeholders::_1); + if (!it->text_as_ascii_lowercase(scratch)) break; // not a printable ascii property name + frozen::string property_name(scratch.data(), scratch.size()); + auto pit = known_properties.find(property_name); + if (pit == known_properties.end()) break; // not a known property + switch(pit->second) { + case PropertyType::font_size: + process_values = std::bind(&TokenQueue::process_font_sizes, this, std::placeholders::_1); + break; + case PropertyType::page_break: + it->erase_text_substring(0, 5); + changed = true; keep_going = false; + break; + case PropertyType::non_standard_writing_mode: + it->set_text("writing-mode"); + changed = true; keep_going = false; + break; } } else break; // no property key found } @@ -294,13 +471,34 @@ class TokenQueue { return changed; } - bool process_font_sizes(std::vector::iterator) { + bool process_font_sizes(std::vector::iterator it) { bool changed = false; + for (; it < queue.end(); it++) { + switch (it->get_type()) { + case TokenType::ident: + if (it->text_as_ascii_lowercase(scratch2)) { + frozen::string key(scratch2.data(), scratch2.size()); + auto fsm = font_size_keywords.find(key); + if (fsm != font_size_keywords.end()) { + it->set_text(fsm->second); + changed = true; + } + } + break; + case TokenType::dimension: + break; + default: + break; + } + } return changed; } public: - TokenQueue(const size_t src_sz, PyObject *url_callback=NULL) : pool(), queue(), out(), url_callback(url_callback) { out.reserve(src_sz * 2); } + TokenQueue(const size_t src_sz, PyObject *url_callback=NULL) : + pool(), queue(), out(), scratch(), scratch2(), url_callback(url_callback) { + out.reserve(src_sz * 2); scratch.reserve(16); scratch2.reserve(16); + } void rewind_output() { out.pop_back(); } @@ -820,6 +1018,20 @@ class Parser { }; +#define handle_exceptions(msg) \ + catch (std::bad_alloc &ex) { \ + return PyErr_NoMemory(); \ + } catch (python_error &ex) { \ + return NULL; \ + } catch (std::exception &ex) { \ + PyErr_SetString(PyExc_Exception, ex.what()); \ + return NULL; \ + } catch (...) { \ + PyErr_SetString(PyExc_Exception, msg); \ + return NULL; \ + } + + static PyObject* transform_properties(const char32_t *src, size_t src_sz, bool is_declaration) { try { @@ -827,21 +1039,30 @@ transform_properties(const char32_t *src, size_t src_sz, bool is_declaration) { Parser parser(src, src_sz, is_declaration); parser.parse(result); return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, result.data(), result.size()); - } catch (std::bad_alloc &ex) { - return PyErr_NoMemory(); - } catch (python_error &ex) { - return NULL; - } catch (std::exception &ex) { - PyErr_SetString(PyExc_Exception, ex.what()); - return NULL; - } catch (...) { - PyErr_SetString(PyExc_Exception, "Unknown error while parsing CSS"); - return NULL; - } + } handle_exceptions("Unknown error while parsing CSS"); } +static PyObject* +parse_css_number_python(PyObject *self, PyObject *src) { + if (!PyUnicode_Check(src)) { PyErr_SetString(PyExc_TypeError, "Unicode string required"); return NULL; } + if (PyUnicode_READY(src) != 0) { return NULL; } + try { + std::u32string text; + text.reserve(PyUnicode_GET_LENGTH(src)); + int kind = PyUnicode_KIND(src); void *data = PyUnicode_DATA(src); + for (Py_ssize_t i = 0; i < PyUnicode_GET_LENGTH(src); i++) text.push_back(PyUnicode_READ(kind, data, i)); + ParsedNumber ans = parse_css_number(text); + if (ans.is_integer) return PyLong_FromLongLong(ans.integer_value); + return PyFloat_FromDouble(ans.float_value); + } handle_exceptions("Unknown error while parsing CSS number"); +} + +#undef handle_exceptions static PyMethodDef methods[] = { + {"parse_css_number", parse_css_number_python, METH_O, + "Parse a CSS number form a string" + }, {NULL, NULL, 0, NULL} }; diff --git a/src/calibre/srv/tests/base.py b/src/calibre/srv/tests/base.py index a2a7ca670b..865f139c5e 100644 --- a/src/calibre/srv/tests/base.py +++ b/src/calibre/srv/tests/base.py @@ -17,13 +17,16 @@ from polyglot import http_client rmtree = partial(shutil.rmtree, ignore_errors=True) -class BaseTest(unittest.TestCase): +class SimpleTest(unittest.TestCase): longMessage = True maxDiff = None ae = unittest.TestCase.assertEqual + +class BaseTest(SimpleTest): + def run(self, result=None): # we retry failing server tests since they are flaky on CI if result is None: diff --git a/src/calibre/srv/tests/fast_css_transform.py b/src/calibre/srv/tests/fast_css_transform.py new file mode 100644 index 0000000000..d2c9158099 --- /dev/null +++ b/src/calibre/srv/tests/fast_css_transform.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2021, Kovid Goyal + + +import ast + +from calibre.srv.tests.base import SimpleTest + + +class TestTransform(SimpleTest): + + def test_number_parsing(self): + from calibre_extensions.fast_css_transform import parse_css_number + for x in '.314 -.314 0.314 0 2 +2 -1 1e2 -3.14E+2 2e-2'.split(): + self.ae(parse_css_number(x), ast.literal_eval(x)) + self.ae(parse_css_number('2em'), 2) + self.ae(parse_css_number('.3em'), 0.3) + self.ae(parse_css_number('3x3'), 3) diff --git a/src/perfect-hashing/frozen/CMakeLists.txt b/src/perfect-hashing/frozen/CMakeLists.txt new file mode 100644 index 0000000000..185378d5c6 --- /dev/null +++ b/src/perfect-hashing/frozen/CMakeLists.txt @@ -0,0 +1,12 @@ +target_sources(frozen-headers INTERFACE + "${prefix}/frozen/algorithm.h" + "${prefix}/frozen/map.h" + "${prefix}/frozen/random.h" + "${prefix}/frozen/set.h" + "${prefix}/frozen/string.h" + "${prefix}/frozen/unordered_map.h" + "${prefix}/frozen/unordered_set.h" + "${prefix}/frozen/bits/algorithms.h" + "${prefix}/frozen/bits/basic_types.h" + "${prefix}/frozen/bits/elsa.h" + "${prefix}/frozen/bits/pmh.h") diff --git a/src/perfect-hashing/frozen/algorithm.h b/src/perfect-hashing/frozen/algorithm.h new file mode 100644 index 0000000000..a543eb3190 --- /dev/null +++ b/src/perfect-hashing/frozen/algorithm.h @@ -0,0 +1,197 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_ALGORITHM_H +#define FROZEN_LETITGO_ALGORITHM_H + +#include "frozen/bits/basic_types.h" +#include "frozen/bits/version.h" +#include "frozen/string.h" + +namespace frozen { + +// 'search' implementation if C++17 is not available +// https://en.cppreference.com/w/cpp/algorithm/search +template +ForwardIterator search(ForwardIterator first, ForwardIterator last, const Searcher & searcher) +{ + return searcher(first, last).first; +} + +// text book implementation from +// https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm + +template class knuth_morris_pratt_searcher { + bits::carray step_; + bits::carray needle_; + + static constexpr bits::carray + build_kmp_cache(char const (&needle)[size + 1]) { + std::ptrdiff_t cnd = 0; + bits::carray cache; + + cache.fill(-1); + for (std::size_t pos = 1; pos < size; ++pos) { + if (needle[pos] == needle[cnd]) { + cache[pos] = cache[cnd]; + cnd += 1; + } else { + cache[pos] = cnd; + cnd = cache[cnd]; + while (cnd >= 0 && needle[pos] != needle[cnd]) + cnd = cache[cnd]; + cnd += 1; + } + } + return cache; + } + +public: + constexpr knuth_morris_pratt_searcher(char const (&needle)[size + 1]) + : step_{build_kmp_cache(needle)}, needle_(needle) {} + + template + constexpr std::pair operator()(ForwardIterator first, ForwardIterator last) const { + std::size_t i = 0; + ForwardIterator iter = first; + while (iter != last) { + if (needle_[i] == *iter) { + if (i == (size - 1)) + return { iter - i, iter - i + size }; + ++i; + ++iter; + } else { + if (step_[i] > -1) { + i = step_[i]; + } else { + ++iter; + i = 0; + } + } + } + return { last, last }; + } +}; + +template +constexpr knuth_morris_pratt_searcher make_knuth_morris_pratt_searcher(char const (&needle)[N]) { + return {needle}; +} + +// text book implementation from +// https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm + +template class boyer_moore_searcher { + using skip_table_type = bits::carray; + using suffix_table_type = bits::carray; + + skip_table_type skip_table_; + suffix_table_type suffix_table_; + bits::carray needle_; + + constexpr auto build_skip_table(char const (&needle)[size + 1]) { + skip_table_type skip_table; + + skip_table.fill(size); + for (std::size_t i = 0; i < size - 1; ++i) + skip_table[needle[i]] -= i + 1; + return skip_table; + } + + constexpr bool is_prefix(char const (&needle)[size + 1], std::size_t pos) { + std::size_t suffixlen = size - pos; + + for (std::size_t i = 0; i < suffixlen; i++) { + if (needle[i] != needle[pos + i]) + return false; + } + return true; + } + + constexpr std::size_t suffix_length(char const (&needle)[size + 1], + std::size_t pos) { + // increment suffix length slen to the first mismatch or beginning + // of the word + for (std::size_t slen = 0; slen < pos ; slen++) + if (needle[pos - slen] != needle[size - 1 - slen]) + return slen; + + return pos; + } + + constexpr auto build_suffix_table(char const (&needle)[size + 1]) { + suffix_table_type suffix; + std::ptrdiff_t last_prefix_index = size - 1; + + // first loop + for (std::ptrdiff_t p = size - 1; p >= 0; p--) { + if (is_prefix(needle, p + 1)) + last_prefix_index = p + 1; + + suffix[p] = last_prefix_index + (size - 1 - p); + } + + // second loop + for (std::size_t p = 0; p < size - 1; p++) { + auto slen = suffix_length(needle, p); + if (needle[p - slen] != needle[size - 1 - slen]) + suffix[size - 1 - slen] = size - 1 - p + slen; + + } + return suffix; + } + +public: + constexpr boyer_moore_searcher(char const (&needle)[size + 1]) + : skip_table_{build_skip_table(needle)}, + suffix_table_{build_suffix_table(needle)}, + needle_(needle) {} + + template + constexpr std::pair operator()(ForwardIterator first, ForwardIterator last) const { + if (size == 0) + return { first, first + size }; + + ForwardIterator iter = first + size - 1; + while (iter < last) { + std::ptrdiff_t j = size - 1; + while (j > 0 && (*iter == needle_[j])) { + --iter; + --j; + } + if (*iter == needle_[0]) + return { iter, iter + size}; + + iter += std::max(skip_table_[*iter], suffix_table_[j]); + } + return { last, last + size}; + } +}; + +template +constexpr boyer_moore_searcher make_boyer_moore_searcher(char const (&needle)[N]) { + return {needle}; +} + +} // namespace frozen + +#endif diff --git a/src/perfect-hashing/frozen/bits/algorithms.h b/src/perfect-hashing/frozen/bits/algorithms.h new file mode 100644 index 0000000000..8d1ffbc524 --- /dev/null +++ b/src/perfect-hashing/frozen/bits/algorithms.h @@ -0,0 +1,229 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_BITS_ALGORITHMS_H +#define FROZEN_LETITGO_BITS_ALGORITHMS_H + +#include "frozen/bits/basic_types.h" + +#include +#include + +namespace frozen { + +namespace bits { + +auto constexpr next_highest_power_of_two(std::size_t v) { + // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + constexpr auto trip_count = std::numeric_limits::digits; + v--; + for(std::size_t i = 1; i < trip_count; i <<= 1) + v |= v >> i; + v++; + return v; +} + +template +auto constexpr log(T v) { + std::size_t n = 0; + while (v > 1) { + n += 1; + v >>= 1; + } + return n; +} + +constexpr std::size_t bit_weight(std::size_t n) { + return (n <= 8*sizeof(unsigned int)) + + (n <= 8*sizeof(unsigned long)) + + (n <= 8*sizeof(unsigned long long)) + + (n <= 128); +} + +unsigned int select_uint_least(std::integral_constant); +unsigned long select_uint_least(std::integral_constant); +unsigned long long select_uint_least(std::integral_constant); +template +unsigned long long select_uint_least(std::integral_constant) { + static_assert(N < 2, "unsupported type size"); + return {}; +} + + +template +using select_uint_least_t = decltype(select_uint_least(std::integral_constant())); + +template +constexpr auto min_element(Iter begin, const Iter end, + Compare const &compare) { + auto result = begin; + while (begin != end) { + if (compare(*begin, *result)) { + result = begin; + } + ++begin; + } + return result; +} + +template +constexpr void cswap(T &a, T &b) { + auto tmp = a; + a = b; + b = tmp; +} + +template +constexpr void cswap(std::pair & a, std::pair & b) { + cswap(a.first, b.first); + cswap(a.second, b.second); +} + +template +constexpr void cswap(std::tuple &a, std::tuple &b, std::index_sequence) { + using swallow = int[]; + (void) swallow{(cswap(std::get(a), std::get(b)), 0)...}; +} + +template +constexpr void cswap(std::tuple &a, std::tuple &b) { + cswap(a, b, std::make_index_sequence()); +} + +template +constexpr Iterator partition(Iterator left, Iterator right, Compare const &compare) { + auto pivot = left + (right - left) / 2; + auto value = *pivot; + cswap(*right, *pivot); + for (auto it = left; 0 < right - it; ++it) { + if (compare(*it, value)) { + cswap(*it, *left); + left++; + } + } + cswap(*right, *left); + return left; +} + +template +constexpr void quicksort(Iterator left, Iterator right, Compare const &compare) { + while (0 < right - left) { + auto new_pivot = bits::partition(left, right, compare); + quicksort(left, new_pivot, compare); + left = new_pivot + 1; + } +} + +template +constexpr bits::carray quicksort(bits::carray const &array, + Compare const &compare) { + bits::carray res = array; + quicksort(res.begin(), res.end() - 1, compare); + return res; +} + +template struct LowerBound { + T const &value_; + Compare const &compare_; + constexpr LowerBound(T const &value, Compare const &compare) + : value_(value), compare_(compare) {} + + template + inline constexpr ForwardIt doit_fast(ForwardIt first, + std::integral_constant) { + return first; + } + + template + inline constexpr ForwardIt doit_fast(ForwardIt first, + std::integral_constant) { + auto constexpr step = N / 2; + static_assert(N/2 == N - N / 2 - 1, "power of two minus 1"); + auto it = first + step; + auto next_it = compare_(*it, value_) ? it + 1 : first; + return doit_fast(next_it, std::integral_constant{}); + } + + template + inline constexpr ForwardIt doitfirst(ForwardIt first, std::integral_constant, std::integral_constant) { + return doit_fast(first, std::integral_constant{}); + } + + template + inline constexpr ForwardIt doitfirst(ForwardIt first, std::integral_constant, std::integral_constant) { + auto constexpr next_power = next_highest_power_of_two(N); + auto constexpr next_start = next_power / 2 - 1; + auto it = first + next_start; + if (compare_(*it, value_)) { + auto constexpr next = N - next_start - 1; + return doitfirst(it + 1, std::integral_constant{}, std::integral_constant{}); + } + else + return doit_fast(first, std::integral_constant{}); + } + + template + inline constexpr ForwardIt doitfirst(ForwardIt first, std::integral_constant, std::integral_constant) { + return doit_fast(first, std::integral_constant{}); + } +}; + +template +constexpr ForwardIt lower_bound(ForwardIt first, const T &value, Compare const &compare) { + return LowerBound{value, compare}.doitfirst(first, std::integral_constant{}, std::integral_constant{}); +} + +template +constexpr bool binary_search(ForwardIt first, const T &value, + Compare const &compare) { + ForwardIt where = lower_bound(first, value, compare); + return (!(where == first + N) && !(compare(value, *where))); +} + + +template +constexpr bool equal(InputIt1 first1, InputIt1 last1, InputIt2 first2) +{ + for (; first1 != last1; ++first1, ++first2) { + if (!(*first1 == *first2)) { + return false; + } + } + return true; +} + +template +constexpr bool lexicographical_compare(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) +{ + for (; (first1 != last1) && (first2 != last2); ++first1, ++first2) { + if (*first1 < *first2) + return true; + if (*first2 < *first1) + return false; + } + return (first1 == last1) && (first2 != last2); +} + +} // namespace bits +} // namespace frozen + +#endif diff --git a/src/perfect-hashing/frozen/bits/basic_types.h b/src/perfect-hashing/frozen/bits/basic_types.h new file mode 100644 index 0000000000..b54a60db56 --- /dev/null +++ b/src/perfect-hashing/frozen/bits/basic_types.h @@ -0,0 +1,207 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_BASIC_TYPES_H +#define FROZEN_LETITGO_BASIC_TYPES_H + +#include "frozen/bits/exceptions.h" + +#include +#include +#include +#include + +namespace frozen { + +namespace bits { + +// used as a fake argument for frozen::make_set and frozen::make_map in the case of N=0 +struct ignored_arg {}; + +template +class cvector { + T data [N] = {}; // zero-initialization for scalar type T, default-initialized otherwise + std::size_t dsize = 0; + +public: + // Container typdefs + using value_type = T; + using reference = value_type &; + using const_reference = const value_type &; + using pointer = value_type *; + using const_pointer = const value_type *; + using iterator = pointer; + using const_iterator = const_pointer; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + // Constructors + constexpr cvector(void) = default; + constexpr cvector(size_type count, const T& value) : dsize(count) { + for (std::size_t i = 0; i < N; ++i) + data[i] = value; + } + + // Iterators + constexpr iterator begin() noexcept { return data; } + constexpr iterator end() noexcept { return data + dsize; } + + // Capacity + constexpr size_type size() const { return dsize; } + + // Element access + constexpr reference operator[](std::size_t index) { return data[index]; } + constexpr const_reference operator[](std::size_t index) const { return data[index]; } + + constexpr reference back() { return data[dsize - 1]; } + constexpr const_reference back() const { return data[dsize - 1]; } + + // Modifiers + constexpr void push_back(const T & a) { data[dsize++] = a; } + constexpr void push_back(T && a) { data[dsize++] = std::move(a); } + constexpr void pop_back() { --dsize; } + + constexpr void clear() { dsize = 0; } +}; + +template +class carray { + T data_ [N] = {}; // zero-initialization for scalar type T, default-initialized otherwise + + template + constexpr carray(T const (&init)[M], std::index_sequence) + : data_{init[I]...} {} + template + constexpr carray(Iter iter, std::index_sequence) + : data_{((void)I, *iter++)...} {} + +public: + // Container typdefs + using value_type = T; + using reference = value_type &; + using const_reference = const value_type &; + using pointer = value_type *; + using const_pointer = const value_type *; + using iterator = pointer; + using const_iterator = const_pointer; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + // Constructors + constexpr carray(void) = default; + template + constexpr carray(T const (&init)[M]) + : carray(init, std::make_index_sequence()) + { + static_assert(M >= N, "Cannot initialize a carray with an smaller array"); + } + template + constexpr carray(std::array const &init) + : carray(&init[0], std::make_index_sequence()) + { + static_assert(M >= N, "Cannot initialize a carray with an smaller array"); + } + constexpr carray(std::initializer_list init) + : carray(init.begin(), std::make_index_sequence()) + { + // clang & gcc doesn't recognize init.size() as a constexpr + // static_assert(init.size() >= N, "Cannot initialize a carray with an smaller initializer list"); + } + + // Iterators + constexpr iterator begin() noexcept { return data_; } + constexpr const_iterator begin() const noexcept { return data_; } + constexpr const_iterator cbegin() const noexcept { return data_; } + constexpr iterator end() noexcept { return data_ + N; } + constexpr const_iterator end() const noexcept { return data_ + N; } + constexpr const_iterator cend() const noexcept { return data_ + N; } + + constexpr reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } + constexpr const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } + constexpr const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } + constexpr reverse_iterator rend() noexcept { return reverse_iterator(begin()); } + constexpr const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } + constexpr const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } + + // Capacity + constexpr size_type size() const { return N; } + constexpr size_type max_size() const { return N; } + + // Element access + constexpr reference operator[](std::size_t index) { return data_[index]; } + constexpr const_reference operator[](std::size_t index) const { return data_[index]; } + + constexpr reference at(std::size_t index) { + if (index > N) + FROZEN_THROW_OR_ABORT(std::out_of_range("Index (" + std::to_string(index) + ") out of bound (" + std::to_string(N) + ')')); + return data_[index]; + } + constexpr const_reference at(std::size_t index) const { + if (index > N) + FROZEN_THROW_OR_ABORT(std::out_of_range("Index (" + std::to_string(index) + ") out of bound (" + std::to_string(N) + ')')); + return data_[index]; + } + + constexpr reference front() { return data_[0]; } + constexpr const_reference front() const { return data_[0]; } + + constexpr reference back() { return data_[N - 1]; } + constexpr const_reference back() const { return data_[N - 1]; } + + constexpr value_type* data() noexcept { return data_; } + constexpr const value_type* data() const noexcept { return data_; } + + // Modifiers + constexpr void fill(const value_type& val) { + for (std::size_t i = 0; i < N; ++i) + data_[i] = val; + } +}; +template +class carray { + +public: + // Container typdefs + using value_type = T; + using reference = value_type &; + using const_reference = const value_type &; + using pointer = value_type *; + using const_pointer = const value_type *; + using iterator = pointer; + using const_iterator = const_pointer; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + // Constructors + constexpr carray(void) = default; + +}; + +} // namespace bits + +} // namespace frozen + +#endif diff --git a/src/perfect-hashing/frozen/bits/constexpr_assert.h b/src/perfect-hashing/frozen/bits/constexpr_assert.h new file mode 100644 index 0000000000..912210dc22 --- /dev/null +++ b/src/perfect-hashing/frozen/bits/constexpr_assert.h @@ -0,0 +1,40 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_CONSTEXPR_ASSERT_H +#define FROZEN_LETITGO_CONSTEXPR_ASSERT_H + +#include + +#ifdef _MSC_VER + +// FIXME: find a way to implement that correctly for msvc +#define constexpr_assert(cond, msg) + +#else + +#define constexpr_assert(cond, msg)\ + assert(cond && msg); +#endif + +#endif + diff --git a/src/perfect-hashing/frozen/bits/defines.h b/src/perfect-hashing/frozen/bits/defines.h new file mode 100644 index 0000000000..0a1663da52 --- /dev/null +++ b/src/perfect-hashing/frozen/bits/defines.h @@ -0,0 +1,58 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_DEFINES_H +#define FROZEN_LETITGO_DEFINES_H + +#if defined(_MSVC_LANG) && !(defined(__EDG__) && defined(__clang__)) // TRANSITION, VSO#273681 + #define FROZEN_LETITGO_IS_MSVC +#endif + +// Code taken from https://stackoverflow.com/questions/43639122/which-values-can-msvc-lang-have +#if defined(FROZEN_LETITGO_IS_MSVC) + #if _MSVC_LANG > 201402 + #define FROZEN_LETITGO_HAS_CXX17 1 + #else /* _MSVC_LANG > 201402 */ + #define FROZEN_LETITGO_HAS_CXX17 0 + #endif /* _MSVC_LANG > 201402 */ +#else /* _MSVC_LANG etc. */ + #if __cplusplus > 201402 + #define FROZEN_LETITGO_HAS_CXX17 1 + #else /* __cplusplus > 201402 */ + #define FROZEN_LETITGO_HAS_CXX17 0 + #endif /* __cplusplus > 201402 */ +#endif /* _MSVC_LANG etc. */ +// End if taken code + +#if FROZEN_LETITGO_HAS_CXX17 == 1 && defined(FROZEN_LETITGO_IS_MSVC) + #define FROZEN_LETITGO_HAS_STRING_VIEW // We assume Visual Studio always has string_view in C++17 +#else + #if FROZEN_LETITGO_HAS_CXX17 == 1 && __has_include() + #define FROZEN_LETITGO_HAS_STRING_VIEW + #endif +#endif + +#ifdef __cpp_char8_t + #define FROZEN_LETITGO_HAS_CHAR8T +#endif + +#endif // FROZEN_LETITGO_DEFINES_H diff --git a/src/perfect-hashing/frozen/bits/elsa.h b/src/perfect-hashing/frozen/bits/elsa.h new file mode 100644 index 0000000000..d7388be920 --- /dev/null +++ b/src/perfect-hashing/frozen/bits/elsa.h @@ -0,0 +1,50 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_ELSA_H +#define FROZEN_LETITGO_ELSA_H + +#include + +namespace frozen { + +template struct elsa { + static_assert(std::is_integral::value || std::is_enum::value, + "only supports integral types, specialize for other types"); + + constexpr std::size_t operator()(T const &value, std::size_t seed) const { + std::size_t key = seed ^ static_cast(value); + key = (~key) + (key << 21); // key = (key << 21) - key - 1; + key = key ^ (key >> 24); + key = (key + (key << 3)) + (key << 8); // key * 265 + key = key ^ (key >> 14); + key = (key + (key << 2)) + (key << 4); // key * 21 + key = key ^ (key >> 28); + key = key + (key << 31); + return key; + } +}; + +template using anna = elsa; +} // namespace frozen + +#endif diff --git a/src/perfect-hashing/frozen/bits/exceptions.h b/src/perfect-hashing/frozen/bits/exceptions.h new file mode 100644 index 0000000000..b43e3e6b90 --- /dev/null +++ b/src/perfect-hashing/frozen/bits/exceptions.h @@ -0,0 +1,39 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_EXCEPTIONS_H +#define FROZEN_LETITGO_EXCEPTIONS_H + +#if defined(FROZEN_NO_EXCEPTIONS) || (defined(_MSC_VER) && !defined(_CPPUNWIND)) || (!defined(_MSC_VER) && !defined(__cpp_exceptions)) + +#include +#define FROZEN_THROW_OR_ABORT(_) std::abort() + +#else + +#include +#define FROZEN_THROW_OR_ABORT(err) throw err + + +#endif + +#endif diff --git a/src/perfect-hashing/frozen/bits/pmh.h b/src/perfect-hashing/frozen/bits/pmh.h new file mode 100644 index 0000000000..76e7ebe0b2 --- /dev/null +++ b/src/perfect-hashing/frozen/bits/pmh.h @@ -0,0 +1,240 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// inspired from http://stevehanov.ca/blog/index.php?id=119 +#ifndef FROZEN_LETITGO_PMH_H +#define FROZEN_LETITGO_PMH_H + +#include "frozen/bits/algorithms.h" +#include "frozen/bits/basic_types.h" + +#include +#include + +namespace frozen { + +namespace bits { + +// Function object for sorting buckets in decreasing order of size +struct bucket_size_compare { + template + bool constexpr operator()(B const &b0, + B const &b1) const { + return b0.size() > b1.size(); + } +}; + +// Step One in pmh routine is to take all items and hash them into buckets, +// with some collisions. Then process those buckets further to build a perfect +// hash function. +// pmh_buckets represents the initial placement into buckets. + +template +struct pmh_buckets { + // Step 0: Bucket max is 2 * sqrt M + // TODO: Come up with justification for this, should it not be O(log M)? + static constexpr auto bucket_max = 2 * (1u << (log(M) / 2)); + + using bucket_t = cvector; + carray buckets; + uint64_t seed; + + // Represents a reference to a bucket. This is used because the buckets + // have to be sorted, but buckets are big, making it slower than sorting refs + struct bucket_ref { + unsigned hash; + const bucket_t * ptr; + + // Forward some interface of bucket + using value_type = typename bucket_t::value_type; + using const_iterator = typename bucket_t::const_iterator; + + constexpr auto size() const { return ptr->size(); } + constexpr const auto & operator[](std::size_t idx) const { return (*ptr)[idx]; } + constexpr auto begin() const { return ptr->begin(); } + constexpr auto end() const { return ptr->end(); } + }; + + // Make a bucket_ref for each bucket + template + carray constexpr make_bucket_refs(std::index_sequence) const { + return {{ bucket_ref{Is, &buckets[Is]}... }}; + } + + // Makes a bucket_ref for each bucket and sorts them by size + carray constexpr get_sorted_buckets() const { + carray result{this->make_bucket_refs(std::make_index_sequence())}; + bits::quicksort(result.begin(), result.end() - 1, bucket_size_compare{}); + return result; + } +}; + +template +pmh_buckets constexpr make_pmh_buckets(const carray & items, + Hash const & hash, + Key const & key, + PRG & prg) { + using result_t = pmh_buckets; + result_t result{}; + bool rejected = false; + // Continue until all items are placed without exceeding bucket_max + while (1) { + for (auto & b : result.buckets) { + b.clear(); + } + result.seed = prg(); + rejected = false; + for (std::size_t i = 0; i < N; ++i) { + auto & bucket = result.buckets[hash(key(items[i]), static_cast(result.seed)) % M]; + if (bucket.size() >= result_t::bucket_max) { + rejected = true; + break; + } + bucket.push_back(i); + } + if (!rejected) { return result; } + } +} + +// Check if an item appears in a cvector +template +constexpr bool all_different_from(cvector & data, T & a) { + for (std::size_t i = 0; i < data.size(); ++i) + if (data[i] == a) + return false; + + return true; +} + +// Represents either an index to a data item array, or a seed to be used with +// a hasher. Seed must have high bit of 1, value has high bit of zero. +struct seed_or_index { + using value_type = uint64_t; + +private: + static constexpr value_type MINUS_ONE = std::numeric_limits::max(); + static constexpr value_type HIGH_BIT = ~(MINUS_ONE >> 1); + + value_type value_ = 0; + +public: + constexpr value_type value() const { return value_; } + constexpr bool is_seed() const { return value_ & HIGH_BIT; } + + constexpr seed_or_index(bool is_seed, value_type value) + : value_(is_seed ? (value | HIGH_BIT) : (value & ~HIGH_BIT)) {} + + constexpr seed_or_index() = default; + constexpr seed_or_index(const seed_or_index &) = default; + constexpr seed_or_index & operator =(const seed_or_index &) = default; +}; + +// Represents the perfect hash function created by pmh algorithm +template +struct pmh_tables { + uint64_t first_seed_; + carray first_table_; + carray second_table_; + Hasher hash_; + + // Looks up a given key, to find its expected index in carray + // Always returns a valid index, must use KeyEqual test after to confirm. + template + constexpr std::size_t lookup(const KeyType & key) const { + auto const d = first_table_[hash_(key, static_cast(first_seed_)) % M]; + if (!d.is_seed()) { return static_cast(d.value()); } // this is narrowing uint64 -> size_t but should be fine + else { return second_table_[hash_(key, static_cast(d.value())) % M]; } + } +}; + +// Make pmh tables for given items, hash function, prg, etc. +template +pmh_tables constexpr make_pmh_tables(const carray & + items, + Hash const &hash, + Key const &key, + PRG prg) { + // Step 1: Place all of the keys into buckets + auto step_one = make_pmh_buckets(items, hash, key, prg); + + // Step 2: Sort the buckets to process the ones with the most items first. + auto buckets = step_one.get_sorted_buckets(); + + // G becomes the first hash table in the resulting pmh function + carray G; // Default constructed to "index 0" + + // H becomes the second hash table in the resulting pmh function + constexpr std::size_t UNUSED = std::numeric_limits::max(); + carray H; + H.fill(UNUSED); + + // Step 3: Map the items in buckets into hash tables. + for (const auto & bucket : buckets) { + auto const bsize = bucket.size(); + + if (bsize == 1) { + // Store index to the (single) item in G + // assert(bucket.hash == hash(key(items[bucket[0]]), step_one.seed) % M); + G[bucket.hash] = {false, static_cast(bucket[0])}; + } else if (bsize > 1) { + + // Repeatedly try different H of d until we find a hash function + // that places all items in the bucket into free slots + seed_or_index d{true, prg()}; + cvector bucket_slots; + + while (bucket_slots.size() < bsize) { + auto slot = hash(key(items[bucket[bucket_slots.size()]]), static_cast(d.value())) % M; + + if (H[slot] != UNUSED || !all_different_from(bucket_slots, slot)) { + bucket_slots.clear(); + d = {true, prg()}; + continue; + } + + bucket_slots.push_back(slot); + } + + // Put successful seed in G, and put indices to items in their slots + // assert(bucket.hash == hash(key(items[bucket[0]]), step_one.seed) % M); + G[bucket.hash] = d; + for (std::size_t i = 0; i < bsize; ++i) + H[bucket_slots[i]] = bucket[i]; + } + } + + // Any unused entries in the H table have to get changed to zero. + // This is because hashing should not fail or return an out-of-bounds entry. + // A lookup fails after we apply user-supplied KeyEqual to the query and the + // key found by hashing. Sending such queries to zero cannot hurt. + for (std::size_t i = 0; i < M; ++i) + if (H[i] == UNUSED) + H[i] = 0; + + return {step_one.seed, G, H, hash}; +} + +} // namespace bits + +} // namespace frozen + +#endif diff --git a/src/perfect-hashing/frozen/bits/version.h b/src/perfect-hashing/frozen/bits/version.h new file mode 100644 index 0000000000..51804d2ca5 --- /dev/null +++ b/src/perfect-hashing/frozen/bits/version.h @@ -0,0 +1,30 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_VERSION_H +#define FROZEN_LETITGO_VERSION_H + +#define FROZEN_MAJOR_VERSION 1 +#define FROZEN_MINOR_VERSION 0 +#define FROZEN_PATCH_VERSION 1 + +#endif diff --git a/src/perfect-hashing/frozen/map.h b/src/perfect-hashing/frozen/map.h new file mode 100644 index 0000000000..9dc206e8f9 --- /dev/null +++ b/src/perfect-hashing/frozen/map.h @@ -0,0 +1,328 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_MAP_H +#define FROZEN_LETITGO_MAP_H + +#include "frozen/bits/algorithms.h" +#include "frozen/bits/basic_types.h" +#include "frozen/bits/constexpr_assert.h" +#include "frozen/bits/exceptions.h" +#include "frozen/bits/version.h" + +#include + +namespace frozen { + +namespace impl { + +template class CompareKey { + + Comparator const comparator_; + +public: + constexpr CompareKey(Comparator const &comparator) + : comparator_(comparator) {} + + template + constexpr int operator()(std::pair const &self, + std::pair const &other) const { + return comparator_(std::get<0>(self), std::get<0>(other)); + } + + template + constexpr int operator()(Key const &self_key, + std::pair const &other) const { + return comparator_(self_key, std::get<0>(other)); + } + + template + constexpr int operator()(std::pair const &self, + Key const &other_key) const { + return comparator_(std::get<0>(self), other_key); + } + + template + constexpr int operator()(Key const &self_key, Key const &other_key) const { + return comparator_(self_key, other_key); + } +}; + +} // namespace impl + +template > +class map { + using container_type = bits::carray, N>; + impl::CompareKey less_than_; + container_type items_; + +public: + using key_type = Key; + using mapped_type = Value; + using value_type = typename container_type::value_type; + using size_type = typename container_type::size_type; + using difference_type = typename container_type::difference_type; + using key_compare = decltype(less_than_); + using reference = typename container_type::reference; + using const_reference = typename container_type::const_reference; + using pointer = typename container_type::pointer; + using const_pointer = typename container_type::const_pointer; + using iterator = typename container_type::iterator; + using const_iterator = typename container_type::const_iterator; + using reverse_iterator = typename container_type::reverse_iterator; + using const_reverse_iterator = + typename container_type::const_reverse_iterator; + +public: + /* constructors */ + constexpr map(container_type items, Compare const &compare) + : less_than_{compare} + , items_{bits::quicksort(items, less_than_)} {} + + explicit constexpr map(container_type items) + : map{items, Compare{}} {} + + constexpr map(std::initializer_list items, Compare const &compare) + : map{container_type {items}, compare} { + constexpr_assert(items.size() == N, "Inconsistent initializer_list size and type size argument"); + } + + constexpr map(std::initializer_list items) + : map{items, Compare{}} {} + + /* element access */ + constexpr Value const& at(Key const &key) const { + return at_impl(*this, key); + } + constexpr Value& at(Key const &key) { + return at_impl(*this, key); + } + + /* iterators */ + constexpr iterator begin() { return items_.begin(); } + constexpr const_iterator begin() const { return items_.begin(); } + constexpr const_iterator cbegin() const { return items_.cbegin(); } + constexpr iterator end() { return items_.end(); } + constexpr const_iterator end() const { return items_.end(); } + constexpr const_iterator cend() const { return items_.cend(); } + + constexpr reverse_iterator rbegin() { return items_.rbegin(); } + constexpr const_reverse_iterator rbegin() const { return items_.rbegin(); } + constexpr const_reverse_iterator crbegin() const { return items_.crbegin(); } + constexpr reverse_iterator rend() { return items_.rend(); } + constexpr const_reverse_iterator rend() const { return items_.rend(); } + constexpr const_reverse_iterator crend() const { return items_.crend(); } + + /* capacity */ + constexpr bool empty() const { return !N; } + constexpr size_type size() const { return N; } + constexpr size_type max_size() const { return N; } + + /* lookup */ + + constexpr std::size_t count(Key const &key) const { + return bits::binary_search(items_.begin(), key, less_than_); + } + + constexpr const_iterator find(Key const &key) const { + return find_impl(*this, key); + } + constexpr iterator find(Key const &key) { + return find_impl(*this, key); + } + + constexpr std::pair + equal_range(Key const &key) const { + return equal_range_impl(*this, key); + } + constexpr std::pair equal_range(Key const &key) { + return equal_range_impl(*this, key); + } + + constexpr const_iterator lower_bound(Key const &key) const { + return lower_bound_impl(*this, key); + } + constexpr iterator lower_bound(Key const &key) { + return lower_bound_impl(*this, key); + } + + constexpr const_iterator upper_bound(Key const &key) const { + return upper_bound_impl(*this, key); + } + constexpr iterator upper_bound(Key const &key) { + return upper_bound_impl(*this, key); + } + + /* observers */ + constexpr key_compare key_comp() const { return less_than_; } + constexpr key_compare value_comp() const { return less_than_; } + + private: + template + static inline constexpr auto& at_impl(This&& self, Key const &key) { + auto where = self.lower_bound(key); + if (where != self.end()) + return where->second; + else + FROZEN_THROW_OR_ABORT(std::out_of_range("unknown key")); + } + + template + static inline constexpr auto find_impl(This&& self, Key const &key) { + auto where = self.lower_bound(key); + if ((where != self.end()) && !self.less_than_(key, *where)) + return where; + else + return self.end(); + } + + template + static inline constexpr auto equal_range_impl(This&& self, Key const &key) { + auto lower = self.lower_bound(key); + using lower_t = decltype(lower); + if (lower == self.end()) + return std::pair{lower, lower}; + else + return std::pair{lower, lower + 1}; + } + + template + static inline constexpr auto lower_bound_impl(This&& self, Key const &key) -> decltype(self.end()) { + auto where = bits::lower_bound(self.items_.begin(), key, self.less_than_); + if ((where != self.end()) && !self.less_than_(key, *where)) + return where; + else + return self.end(); + } + + template + static inline constexpr auto upper_bound_impl(This&& self, Key const &key) -> decltype(self.end()) { + auto where = bits::lower_bound(self.items_.begin(), key, self.less_than_); + if ((where != self.end()) && !self.less_than_(key, *where)) + return where + 1; + else + return self.end(); + } +}; + +template +class map { + using container_type = bits::carray, 0>; + impl::CompareKey less_than_; + +public: + using key_type = Key; + using mapped_type = Value; + using value_type = typename container_type::value_type; + using size_type = typename container_type::size_type; + using difference_type = typename container_type::difference_type; + using key_compare = decltype(less_than_); + using reference = typename container_type::reference; + using const_reference = typename container_type::const_reference; + using pointer = typename container_type::pointer; + using const_pointer = typename container_type::const_pointer; + using iterator = pointer; + using const_iterator = const_pointer; + using reverse_iterator = pointer; + using const_reverse_iterator = const_pointer; + +public: + /* constructors */ + constexpr map(const map &other) = default; + constexpr map(std::initializer_list, Compare const &compare) + : less_than_{compare} {} + constexpr map(std::initializer_list items) + : map{items, Compare{}} {} + + /* element access */ + constexpr mapped_type at(Key const &) const { + FROZEN_THROW_OR_ABORT(std::out_of_range("invalid key")); + } + constexpr mapped_type at(Key const &) { + FROZEN_THROW_OR_ABORT(std::out_of_range("invalid key")); + } + + /* iterators */ + constexpr iterator begin() { return nullptr; } + constexpr const_iterator begin() const { return nullptr; } + constexpr const_iterator cbegin() const { return nullptr; } + constexpr iterator end() { return nullptr; } + constexpr const_iterator end() const { return nullptr; } + constexpr const_iterator cend() const { return nullptr; } + + constexpr reverse_iterator rbegin() { return nullptr; } + constexpr const_reverse_iterator rbegin() const { return nullptr; } + constexpr const_reverse_iterator crbegin() const { return nullptr; } + constexpr reverse_iterator rend() { return nullptr; } + constexpr const_reverse_iterator rend() const { return nullptr; } + constexpr const_reverse_iterator crend() const { return nullptr; } + + /* capacity */ + constexpr bool empty() const { return true; } + constexpr size_type size() const { return 0; } + constexpr size_type max_size() const { return 0; } + + /* lookup */ + + constexpr std::size_t count(Key const &) const { return 0; } + + constexpr const_iterator find(Key const &) const { return end(); } + constexpr iterator find(Key const &) { return end(); } + + constexpr std::pair + equal_range(Key const &) const { + return {end(), end()}; + } + constexpr std::pair + equal_range(Key const &) { + return {end(), end()}; + } + + constexpr const_iterator lower_bound(Key const &) const { return end(); } + constexpr iterator lower_bound(Key const &) { return end(); } + + constexpr const_iterator upper_bound(Key const &) const { return end(); } + constexpr iterator upper_bound(Key const &) { return end(); } + + /* observers */ + constexpr key_compare key_comp() const { return less_than_; } + constexpr key_compare value_comp() const { return less_than_; } +}; + +template +constexpr auto make_map(bits::ignored_arg = {}/* for consistency with the initializer below for N = 0*/) { + return map{}; +} + +template +constexpr auto make_map(std::pair const (&items)[N]) { + return map{items}; +} + +template +constexpr auto make_map(std::array, N> const &items) { + return map{items}; +} + +} // namespace frozen + +#endif diff --git a/src/perfect-hashing/frozen/random.h b/src/perfect-hashing/frozen/random.h new file mode 100644 index 0000000000..d88cd910f2 --- /dev/null +++ b/src/perfect-hashing/frozen/random.h @@ -0,0 +1,90 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_RANDOM_H +#define FROZEN_LETITGO_RANDOM_H + +#include "frozen/bits/algorithms.h" +#include "frozen/bits/version.h" + +#include +#include + +namespace frozen { +template +class linear_congruential_engine { + + static_assert(std::is_unsigned::value, + "UIntType must be an unsigned integral type"); + +public: + using result_type = UIntType; + static constexpr result_type multiplier = a; + static constexpr result_type increment = c; + static constexpr result_type modulus = m; + static constexpr result_type default_seed = 1u; + + linear_congruential_engine() = default; + constexpr linear_congruential_engine(result_type s) { seed(s); } + + void seed(result_type s = default_seed) { state_ = s; } + constexpr result_type operator()() { + using uint_least_t = bits::select_uint_least_t; + uint_least_t tmp = static_cast(multiplier) * state_ + increment; + + // the static cast below may end up doing a truncation + if(modulus != 0) + state_ = static_cast(tmp % modulus); + else + state_ = static_cast(tmp); + return state_; + } + constexpr void discard(unsigned long long n) { + while (n--) + operator()(); + } + static constexpr result_type min() { return increment == 0u ? 1u : 0u; } + static constexpr result_type max() { return modulus - 1u; } + friend constexpr bool operator==(linear_congruential_engine const &self, + linear_congruential_engine const &other) { + return self.state_ == other.state_; + } + friend constexpr bool operator!=(linear_congruential_engine const &self, + linear_congruential_engine const &other) { + return !(self == other); + } + +private: + result_type state_ = default_seed; +}; + +using minstd_rand0 = + linear_congruential_engine; +using minstd_rand = + linear_congruential_engine; + +// This generator is used by default in unordered frozen containers +using default_prg_t = minstd_rand; + +} // namespace frozen + +#endif diff --git a/src/perfect-hashing/frozen/set.h b/src/perfect-hashing/frozen/set.h new file mode 100644 index 0000000000..55c45e82a1 --- /dev/null +++ b/src/perfect-hashing/frozen/set.h @@ -0,0 +1,225 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_SET_H +#define FROZEN_SET_H + +#include "frozen/bits/algorithms.h" +#include "frozen/bits/basic_types.h" +#include "frozen/bits/constexpr_assert.h" +#include "frozen/bits/version.h" + +#include + +namespace frozen { + +template > class set { + using container_type = bits::carray; + Compare less_than_; + container_type keys_; + +public: + /* container typedefs*/ + using key_type = Key; + using value_type = Key; + using size_type = typename container_type::size_type; + using difference_type = typename container_type::size_type; + using key_compare = Compare; + using value_compare = Compare; + using reference = typename container_type::const_reference; + using const_reference = reference; + using pointer = typename container_type::const_pointer; + using const_pointer = pointer; + using iterator = typename container_type::const_iterator; + using reverse_iterator = typename container_type::const_reverse_iterator; + using const_iterator = iterator; + using const_reverse_iterator = reverse_iterator; + +public: + /* constructors */ + constexpr set(const set &other) = default; + + constexpr set(container_type keys, Compare const & comp) + : less_than_{comp} + , keys_(bits::quicksort(keys, less_than_)) { + } + + explicit constexpr set(container_type keys) + : set{keys, Compare{}} {} + + constexpr set(std::initializer_list keys, Compare const & comp) + : set{container_type{keys}, comp} { + constexpr_assert(keys.size() == N, "Inconsistent initializer_list size and type size argument"); + } + + constexpr set(std::initializer_list keys) + : set{keys, Compare{}} {} + + /* capacity */ + constexpr bool empty() const { return !N; } + constexpr size_type size() const { return N; } + constexpr size_type max_size() const { return N; } + + /* lookup */ + constexpr std::size_t count(Key const &key) const { + return bits::binary_search(keys_.begin(), key, less_than_); + } + + constexpr const_iterator find(Key const &key) const { + const_iterator where = lower_bound(key); + if ((where != end()) && !less_than_(key, *where)) + return where; + else + return end(); + } + + constexpr std::pair equal_range(Key const &key) const { + auto const lower = lower_bound(key); + if (lower == end()) + return {lower, lower}; + else + return {lower, lower + 1}; + } + + constexpr const_iterator lower_bound(Key const &key) const { + auto const where = bits::lower_bound(keys_.begin(), key, less_than_); + if ((where != end()) && !less_than_(key, *where)) + return where; + else + return end(); + } + + constexpr const_iterator upper_bound(Key const &key) const { + auto const where = bits::lower_bound(keys_.begin(), key, less_than_); + if ((where != end()) && !less_than_(key, *where)) + return where + 1; + else + return end(); + } + + /* observers */ + constexpr key_compare key_comp() const { return less_than_; } + constexpr key_compare value_comp() const { return less_than_; } + + /* iterators */ + constexpr const_iterator begin() const { return keys_.begin(); } + constexpr const_iterator cbegin() const { return keys_.cbegin(); } + constexpr const_iterator end() const { return keys_.end(); } + constexpr const_iterator cend() const { return keys_.cend(); } + + constexpr const_reverse_iterator rbegin() const { return keys_.rbegin(); } + constexpr const_reverse_iterator crbegin() const { return keys_.crbegin(); } + constexpr const_reverse_iterator rend() const { return keys_.rend(); } + constexpr const_reverse_iterator crend() const { return keys_.crend(); } + + /* comparison */ + constexpr bool operator==(set const& rhs) const { return bits::equal(begin(), end(), rhs.begin()); } + constexpr bool operator!=(set const& rhs) const { return !(*this == rhs); } + constexpr bool operator<(set const& rhs) const { return bits::lexicographical_compare(begin(), end(), rhs.begin(), rhs.end()); } + constexpr bool operator<=(set const& rhs) const { return (*this < rhs) || (*this == rhs); } + constexpr bool operator>(set const& rhs) const { return bits::lexicographical_compare(rhs.begin(), rhs.end(), begin(), end()); } + constexpr bool operator>=(set const& rhs) const { return (*this > rhs) || (*this == rhs); } +}; + +template class set { + using container_type = bits::carray; // just for the type definitions + Compare less_than_; + +public: + /* container typedefs*/ + using key_type = Key; + using value_type = Key; + using size_type = typename container_type::size_type; + using difference_type = typename container_type::size_type; + using key_compare = Compare; + using value_compare = Compare; + using reference = typename container_type::const_reference; + using const_reference = reference; + using pointer = typename container_type::const_pointer; + using const_pointer = pointer; + using iterator = pointer; + using reverse_iterator = pointer; + using const_iterator = const_pointer; + using const_reverse_iterator = const_pointer; + +public: + /* constructors */ + constexpr set(const set &other) = default; + constexpr set(bits::carray, Compare const &) {} + explicit constexpr set(bits::carray) {} + + constexpr set(std::initializer_list, Compare const &comp) + : less_than_{comp} {} + constexpr set(std::initializer_list keys) : set{keys, Compare{}} {} + + /* capacity */ + constexpr bool empty() const { return true; } + constexpr size_type size() const { return 0; } + constexpr size_type max_size() const { return 0; } + + /* lookup */ + constexpr std::size_t count(Key const &) const { return 0; } + + constexpr const_iterator find(Key const &) const { return end(); } + + constexpr std::pair + equal_range(Key const &) const { return {end(), end()}; } + + constexpr const_iterator lower_bound(Key const &) const { return end(); } + + constexpr const_iterator upper_bound(Key const &) const { return end(); } + + /* observers */ + constexpr key_compare key_comp() const { return less_than_; } + constexpr key_compare value_comp() const { return less_than_; } + + /* iterators */ + constexpr const_iterator begin() const { return nullptr; } + constexpr const_iterator cbegin() const { return nullptr; } + constexpr const_iterator end() const { return nullptr; } + constexpr const_iterator cend() const { return nullptr; } + + constexpr const_reverse_iterator rbegin() const { return nullptr; } + constexpr const_reverse_iterator crbegin() const { return nullptr; } + constexpr const_reverse_iterator rend() const { return nullptr; } + constexpr const_reverse_iterator crend() const { return nullptr; } +}; + +template +constexpr auto make_set(bits::ignored_arg = {}/* for consistency with the initializer below for N = 0*/) { + return set{}; +} + +template +constexpr auto make_set(const T (&args)[N]) { + return set(args); +} + +template +constexpr auto make_set(std::array const &args) { + return set(args); +} + + +} // namespace frozen + +#endif diff --git a/src/perfect-hashing/frozen/string.h b/src/perfect-hashing/frozen/string.h new file mode 100644 index 0000000000..1fa07d9f2a --- /dev/null +++ b/src/perfect-hashing/frozen/string.h @@ -0,0 +1,152 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_STRING_H +#define FROZEN_LETITGO_STRING_H + +#include "frozen/bits/elsa.h" +#include "frozen/bits/version.h" +#include "frozen/bits/defines.h" + +#include + +#ifdef FROZEN_LETITGO_HAS_STRING_VIEW +#include +#endif + +namespace frozen { + +template +class basic_string { + using chr_t = _CharT; + + chr_t const *data_; + std::size_t size_; + +public: + template + constexpr basic_string(chr_t const (&data)[N]) + : data_(data), size_(N - 1) {} + constexpr basic_string(chr_t const *data, std::size_t size) + : data_(data), size_(size) {} + +#ifdef FROZEN_LETITGO_HAS_STRING_VIEW + constexpr basic_string(std::basic_string_view data) + : data_(data.data()), size_(data.size()) {} +#endif + + constexpr basic_string(const basic_string &) noexcept = default; + constexpr basic_string &operator=(const basic_string &) noexcept = default; + + constexpr std::size_t size() const { return size_; } + + constexpr chr_t operator[](std::size_t i) const { return data_[i]; } + + constexpr bool operator==(basic_string other) const { + if (size_ != other.size_) + return false; + for (std::size_t i = 0; i < size_; ++i) + if (data_[i] != other.data_[i]) + return false; + return true; + } + + constexpr bool operator<(const basic_string &other) const { + unsigned i = 0; + while (i < size() && i < other.size()) { + if ((*this)[i] < other[i]) { + return true; + } + if ((*this)[i] > other[i]) { + return false; + } + ++i; + } + return size() < other.size(); + } + + constexpr const chr_t *data() const { return data_; } +}; + +template struct elsa> { + constexpr std::size_t operator()(basic_string<_CharT> value) const { + std::size_t d = 5381; + for (std::size_t i = 0; i < value.size(); ++i) + d = d * 33 + static_cast(value[i]); + return d; + } + // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function + // With the lowest bits removed, based on experimental setup. + constexpr std::size_t operator()(basic_string<_CharT> value, std::size_t seed) const { + std::size_t d = (0x811c9dc5 ^ seed) * static_cast(0x01000193); + for (std::size_t i = 0; i < value.size(); ++i) + d = (d ^ static_cast(value[i])) * static_cast(0x01000193); + return d >> 8 ; + } +}; + +using string = basic_string; +using wstring = basic_string; +using u16string = basic_string; +using u32string = basic_string; + +#ifdef FROZEN_LETITGO_HAS_CHAR8T +using u8string = basic_string; +#endif + +namespace string_literals { + +constexpr string operator"" _s(const char *data, std::size_t size) { + return {data, size}; +} + +constexpr wstring operator"" _s(const wchar_t *data, std::size_t size) { + return {data, size}; +} + +constexpr u16string operator"" _s(const char16_t *data, std::size_t size) { + return {data, size}; +} + +constexpr u32string operator"" _s(const char32_t *data, std::size_t size) { + return {data, size}; +} + +#ifdef FROZEN_LETITGO_HAS_CHAR8T +constexpr u8string operator"" _s(const char8_t *data, std::size_t size) { + return {data, size}; +} +#endif + +} // namespace string_literals + +} // namespace frozen + +namespace std { +template struct hash> { + size_t operator()(frozen::basic_string<_CharT> s) const { + return frozen::elsa>{}(s); + } +}; +} // namespace std + +#endif diff --git a/src/perfect-hashing/frozen/unordered_map.h b/src/perfect-hashing/frozen/unordered_map.h new file mode 100644 index 0000000000..4c6cc582e3 --- /dev/null +++ b/src/perfect-hashing/frozen/unordered_map.h @@ -0,0 +1,202 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_UNORDERED_MAP_H +#define FROZEN_LETITGO_UNORDERED_MAP_H + +#include "frozen/bits/basic_types.h" +#include "frozen/bits/constexpr_assert.h" +#include "frozen/bits/elsa.h" +#include "frozen/bits/exceptions.h" +#include "frozen/bits/pmh.h" +#include "frozen/bits/version.h" +#include "frozen/random.h" + +#include +#include + +namespace frozen { + +namespace bits { + +struct GetKey { + template constexpr auto const &operator()(KV const &kv) const { + return kv.first; + } +}; + +} // namespace bits + +template , + class KeyEqual = std::equal_to> +class unordered_map { + static constexpr std::size_t storage_size = + bits::next_highest_power_of_two(N) * (N < 32 ? 2 : 1); // size adjustment to prevent high collision rate for small sets + using container_type = bits::carray, N>; + using tables_type = bits::pmh_tables; + + KeyEqual const equal_; + container_type items_; + tables_type tables_; + +public: + /* typedefs */ + using Self = unordered_map; + using key_type = Key; + using mapped_type = Value; + using value_type = typename container_type::value_type; + using size_type = typename container_type::size_type; + using difference_type = typename container_type::difference_type; + using hasher = Hash; + using key_equal = KeyEqual; + using reference = typename container_type::reference; + using const_reference = typename container_type::const_reference; + using pointer = typename container_type::pointer; + using const_pointer = typename container_type::const_pointer; + using iterator = typename container_type::iterator; + using const_iterator = typename container_type::const_iterator; + +public: + /* constructors */ + unordered_map(unordered_map const &) = default; + constexpr unordered_map(container_type items, + Hash const &hash, KeyEqual const &equal) + : equal_{equal} + , items_{items} + , tables_{ + bits::make_pmh_tables( + items_, hash, bits::GetKey{}, default_prg_t{})} {} + explicit constexpr unordered_map(container_type items) + : unordered_map{items, Hash{}, KeyEqual{}} {} + + constexpr unordered_map(std::initializer_list items, + Hash const & hash, KeyEqual const & equal) + : unordered_map{container_type{items}, hash, equal} { + constexpr_assert(items.size() == N, "Inconsistent initializer_list size and type size argument"); + } + + constexpr unordered_map(std::initializer_list items) + : unordered_map{items, Hash{}, KeyEqual{}} {} + + /* iterators */ + constexpr iterator begin() { return items_.begin(); } + constexpr iterator end() { return items_.end(); } + constexpr const_iterator begin() const { return items_.begin(); } + constexpr const_iterator end() const { return items_.end(); } + constexpr const_iterator cbegin() const { return items_.cbegin(); } + constexpr const_iterator cend() const { return items_.cend(); } + + /* capacity */ + constexpr bool empty() const { return !N; } + constexpr size_type size() const { return N; } + constexpr size_type max_size() const { return N; } + + /* lookup */ + constexpr std::size_t count(Key const &key) const { + auto const &kv = lookup(key); + return equal_(kv.first, key); + } + + constexpr Value const &at(Key const &key) const { + return at_impl(*this, key); + } + constexpr Value &at(Key const &key) { + return at_impl(*this, key); + } + + constexpr const_iterator find(Key const &key) const { + return find_impl(*this, key); + } + constexpr iterator find(Key const &key) { + return find_impl(*this, key); + } + + constexpr std::pair equal_range(Key const &key) const { + return equal_range_impl(*this, key); + } + constexpr std::pair equal_range(Key const &key) { + return equal_range_impl(*this, key); + } + + /* bucket interface */ + constexpr std::size_t bucket_count() const { return storage_size; } + constexpr std::size_t max_bucket_count() const { return storage_size; } + + /* observers*/ + constexpr hasher hash_function() const { return tables_.hash_; } + constexpr key_equal key_eq() const { return equal_; } + +private: + template + static inline constexpr auto& at_impl(This&& self, Key const &key) { + auto& kv = self.lookup(key); + if (self.equal_(kv.first, key)) + return kv.second; + else + FROZEN_THROW_OR_ABORT(std::out_of_range("unknown key")); + } + + template + static inline constexpr auto find_impl(This&& self, Key const &key) { + auto& kv = self.lookup(key); + if (self.equal_(kv.first, key)) + return &kv; + else + return self.items_.end(); + } + + template + static inline constexpr auto equal_range_impl(This&& self, Key const &key) { + auto& kv = self.lookup(key); + using kv_ptr = decltype(&kv); + if (self.equal_(kv.first, key)) + return std::pair{&kv, &kv + 1}; + else + return std::pair{self.items_.end(), self.items_.end()}; + } + + template + static inline constexpr auto& lookup_impl(This&& self, Key const &key) { + return self.items_[self.tables_.lookup(key)]; + } + + constexpr auto const& lookup(Key const &key) const { + return lookup_impl(*this, key); + } + constexpr auto& lookup(Key const &key) { + return lookup_impl(*this, key); + } +}; + +template +constexpr auto make_unordered_map(std::pair const (&items)[N]) { + return unordered_map{items}; +} + +template +constexpr auto make_unordered_map(std::array, N> const &items) { + return unordered_map{items}; +} + +} // namespace frozen + +#endif diff --git a/src/perfect-hashing/frozen/unordered_set.h b/src/perfect-hashing/frozen/unordered_set.h new file mode 100644 index 0000000000..70d786ead3 --- /dev/null +++ b/src/perfect-hashing/frozen/unordered_set.h @@ -0,0 +1,152 @@ +/* + * Frozen + * Copyright 2016 QuarksLab + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef FROZEN_LETITGO_UNORDERED_SET_H +#define FROZEN_LETITGO_UNORDERED_SET_H + +#include "frozen/bits/basic_types.h" +#include "frozen/bits/constexpr_assert.h" +#include "frozen/bits/elsa.h" +#include "frozen/bits/pmh.h" +#include "frozen/bits/version.h" +#include "frozen/random.h" + +#include + +namespace frozen { + +namespace bits { + +struct Get { + template constexpr T const &operator()(T const &key) const { + return key; + } +}; + +} // namespace bits + +template , + class KeyEqual = std::equal_to> +class unordered_set { + static constexpr std::size_t storage_size = + bits::next_highest_power_of_two(N) * (N < 32 ? 2 : 1); // size adjustment to prevent high collision rate for small sets + using container_type = bits::carray; + using tables_type = bits::pmh_tables; + + KeyEqual const equal_; + container_type keys_; + tables_type tables_; + +public: + /* typedefs */ + using key_type = Key; + using value_type = Key; + using size_type = typename container_type::size_type; + using difference_type = typename container_type::difference_type; + using hasher = Hash; + using key_equal = KeyEqual; + using const_reference = typename container_type::const_reference; + using reference = const_reference; + using const_pointer = typename container_type::const_pointer; + using pointer = const_pointer; + using const_iterator = const_pointer; + using iterator = const_iterator; + +public: + /* constructors */ + unordered_set(unordered_set const &) = default; + constexpr unordered_set(container_type keys, Hash const &hash, + KeyEqual const &equal) + : equal_{equal} + , keys_{keys} + , tables_{bits::make_pmh_tables( + keys_, hash, bits::Get{}, default_prg_t{})} {} + explicit constexpr unordered_set(container_type keys) + : unordered_set{keys, Hash{}, KeyEqual{}} {} + + constexpr unordered_set(std::initializer_list keys) + : unordered_set{keys, Hash{}, KeyEqual{}} {} + + constexpr unordered_set(std::initializer_list keys, Hash const & hash, KeyEqual const & equal) + : unordered_set{container_type{keys}, hash, equal} { + constexpr_assert(keys.size() == N, "Inconsistent initializer_list size and type size argument"); + } + + /* iterators */ + constexpr const_iterator begin() const { return keys_.begin(); } + constexpr const_iterator end() const { return keys_.end(); } + constexpr const_iterator cbegin() const { return keys_.cbegin(); } + constexpr const_iterator cend() const { return keys_.cend(); } + + /* capacity */ + constexpr bool empty() const { return !N; } + constexpr size_type size() const { return N; } + constexpr size_type max_size() const { return N; } + + /* lookup */ + constexpr std::size_t count(Key const &key) const { + auto const k = lookup(key); + return equal_(k, key); + } + constexpr const_iterator find(Key const &key) const { + auto const &k = lookup(key); + if (equal_(k, key)) + return &k; + else + return keys_.end(); + } + + constexpr std::pair equal_range(Key const &key) const { + auto const &k = lookup(key); + if (equal_(k, key)) + return {&k, &k + 1}; + else + return {keys_.end(), keys_.end()}; + } + + /* bucket interface */ + constexpr std::size_t bucket_count() const { return storage_size; } + constexpr std::size_t max_bucket_count() const { return storage_size; } + + /* observers*/ + constexpr hasher hash_function() const { return tables_.hash_; } + constexpr key_equal key_eq() const { return equal_; } + +private: + constexpr auto const &lookup(Key const &key) const { + return keys_[tables_.lookup(key)]; + } +}; + +template +constexpr auto make_unordered_set(T const (&keys)[N]) { + return unordered_set{keys}; +} + +template +constexpr auto make_unordered_set(std::array const &keys) { + return unordered_set{keys}; +} + +} // namespace frozen + +#endif