diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index b57e15733d..4d8b8a0113 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -242,9 +242,9 @@ class KindleDXOutput(OutputProfile): description = _('This profile is intended for the Amazon Kindle DX.') # Screen size is a best guess - screen_size = (824, 1200) + screen_size = (744, 1022) dpi = 150.0 - comic_screen_size = (741, 1080) + comic_screen_size = (741, 1022) @classmethod def tags_to_string(cls, tags): diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index e9b9051159..678c21f4ab 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -694,7 +694,7 @@ def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None, ''' from calibre.ebooks.oeb.base import OEBBook html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html, - opts.preprocess_html) + opts.preprocess_html, getattr(opts, 'pdf_line_length', 0.5)) oeb = OEBBook(log, html_preprocessor, pretty_print=opts.pretty_print, input_encoding=encoding) if not populate: diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index a508a81b95..82637016cc 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -160,9 +160,11 @@ class HTMLPreProcessor(object): (re.compile('<]*?id=subtitle[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL), lambda match : '

%s

'%(match.group(1),)), ] - def __init__(self, input_plugin_preprocess, plugin_preprocess): + def __init__(self, input_plugin_preprocess, plugin_preprocess, + pdf_line_length): self.input_plugin_preprocess = input_plugin_preprocess self.plugin_preprocess = plugin_preprocess + self.pdf_line_length = pdf_line_length def is_baen(self, src): return re.compile(r'') + self.anchor_offset = buffer.tell() # CybookG3 'Start Reading' link if 'text' in self.oeb.guide: href = self.oeb.guide['text'].href diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 0cc17bd14f..4b4fb95fb7 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -16,7 +16,7 @@ from lxml import etree from lxml.cssselect import CSSSelector from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \ - urldefrag, rewrite_links, urlunquote + urldefrag, rewrite_links, urlunquote, barename from calibre.ebooks.epub import rules XPath = functools.partial(_XPath, namespaces=NAMESPACES) @@ -46,9 +46,10 @@ class Split(object): if self.page_breaks_xpath is not None: self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)] - def __call__(self, oeb, context): + def __call__(self, oeb, opts): self.oeb = oeb self.log = oeb.log + self.opts = opts self.map = {} for item in list(self.oeb.manifest.items): if item.spine_position is not None and etree.iselement(item.data): @@ -62,7 +63,7 @@ class Split(object): page_breaks, page_break_ids = self.find_page_breaks(item) splitter = FlowSplitter(item, page_breaks, page_break_ids, - self.max_flow_size, self.oeb) + self.max_flow_size, self.oeb, self.opts) if splitter.was_split: am = splitter.anchor_map self.map[item.href] = collections.defaultdict( @@ -153,9 +154,11 @@ class Split(object): class FlowSplitter(object): 'The actual splitting logic' - def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb): + def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb, + opts): self.item = item self.oeb = oeb + self.opts = opts self.log = oeb.log self.page_breaks = page_breaks self.page_break_ids = page_break_ids @@ -221,6 +224,34 @@ class FlowSplitter(object): return None return body[0] + def adjust_split_point(self, root, path): + ''' + Move the split point up its ancestor chain if it has no textual content + before it. This handles the common case: +

Chapter 1

...
with a page break on the + h2. + ''' + sp = root.xpath(path)[0] + while True: + parent = sp.getparent() + if barename(parent.tag) in ('body', 'html'): + break + if parent.text and parent.text.strip(): + break + if parent.index(sp) > 0: + break + sp = parent + + npath = sp.getroottree().getpath(sp) + + if self.opts.verbose > 3 and npath != path: + self.log.debug('\t\t\tMoved split point %s to %s'%(path, npath)) + + + return npath + + + def do_split(self, tree, split_point, before): ''' Split ``tree`` into a *before* and *after* tree at ``split_point``, @@ -236,9 +267,11 @@ class FlowSplitter(object): root = tree.getroot() root2 = tree2.getroot() body, body2 = map(self.get_body, (root, root2)) + path = self.adjust_split_point(root, path) split_point = root.xpath(path)[0] split_point2 = root2.xpath(path)[0] + def nix_element(elem, top=True): parent = elem.getparent() index = parent.index(elem) @@ -254,9 +287,12 @@ class FlowSplitter(object): if elem is split_point: hit_split_point = True if before: + x = elem.get('id', None) nix_element(elem) + continue if hit_split_point: + x = elem.get('id', None) nix_element(elem) @@ -266,9 +302,11 @@ class FlowSplitter(object): if elem is split_point2: hit_split_point = True if not before: + x = elem.get('id', None) nix_element(elem, top=False) continue if not hit_split_point: + x = elem.get('id', None) nix_element(elem, top=False) return tree, tree2 diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index 4e653c212f..468ba61ff0 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -36,8 +36,8 @@ class DetectStructure(object): if self.oeb.toc.count() < 1: if not opts.no_chapters_in_toc and self.detected_chapters: self.create_toc_from_chapters() - if self.oeb.toc.count() < opts.toc_threshold: - self.create_toc_from_links() + if self.oeb.toc.count() < opts.toc_threshold: + self.create_toc_from_links() if self.oeb.toc.count() < 2 and orig_toc.count() > 2: self.oeb.toc = orig_toc else: diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py index d59c9da61b..08bc1560a3 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/pdf/input.py @@ -20,6 +20,8 @@ class PDFInput(InputFormatPlugin): options = set([ OptionRecommendation(name='no_images', recommended_value=False, help=_('Do not extract images from the document')), + OptionRecommendation(name='pdf_line_length', recommended_value=0.5, + help=_('Average line length for line breaking')), ]) def convert(self, stream, options, file_ext, log, diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index a8b6f2d05b..af4ca16eac 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -48,7 +48,8 @@ def _config(): help=_('Defaults for conversion to LRF')) c.add_opt('LRF_ebook_viewer_options', default=None, help=_('Options for the LRF ebook viewer')) - c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT', 'MOBI', 'PRC', 'HTML', 'FB2'], + c.add_opt('internally_viewed_formats', default=['LRF', 'EPUB', 'LIT', + 'MOBI', 'PRC', 'HTML', 'FB2', 'PDB', 'RB'], help=_('Formats that are viewed using the internal viewer')) c.add_opt('column_map', default=ALL_COLUMNS, help=_('Columns to be displayed in the book list')) diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index 7d388b077e..9b8810b3a4 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -12,7 +12,7 @@ from calibre.gui2.dialogs.tag_editor import TagEditor from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): - + def __init__(self, window, rows, db): QDialog.__init__(self, window) Ui_MetadataBulkDialog.__init__(self) @@ -22,33 +22,33 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): self.write_series = False self.write_rating = False self.changed = False - QObject.connect(self.button_box, SIGNAL("accepted()"), self.sync) + QObject.connect(self.button_box, SIGNAL("accepted()"), self.sync) QObject.connect(self.rating, SIGNAL('valueChanged(int)'), self.rating_changed) - + all_series = self.db.all_series() - + for i in all_series: id, name = i self.series.addItem(name) - + for f in self.db.all_formats(): self.remove_format.addItem(f) - + self.remove_format.setCurrentIndex(-1) - + self.series.lineEdit().setText('') QObject.connect(self.series, SIGNAL('currentIndexChanged(int)'), self.series_changed) QObject.connect(self.series, SIGNAL('editTextChanged(QString)'), self.series_changed) QObject.connect(self.tag_editor_button, SIGNAL('clicked()'), self.tag_editor) self.exec_() - + def tag_editor(self): d = TagEditor(self, self.db, None) d.exec_() if d.result() == QDialog.Accepted: tag_string = ', '.join(d.tags) self.tags.setText(tag_string) - + def sync(self): for id in self.ids: au = qstring_to_unicode(self.authors.text()) @@ -80,14 +80,14 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): self.db.set_tags(id, tags, append=True, notify=False) if self.write_series: self.db.set_series(id, qstring_to_unicode(self.series.currentText()), notify=False) - + if self.remove_format.currentIndex() > -1: self.db.remove_format(id, unicode(self.remove_format.currentText()), index_is_id=True, notify=False) - + self.changed = True - + def series_changed(self): self.write_series = True - + def rating_changed(self): - self.write_rating = True \ No newline at end of file + self.write_rating = True diff --git a/src/calibre/gui2/images/news/gva_be.png b/src/calibre/gui2/images/news/gva_be.png new file mode 100644 index 0000000000..ca753a5bbd Binary files /dev/null and b/src/calibre/gui2/images/news/gva_be.png differ diff --git a/src/calibre/gui2/images/news/hln.png b/src/calibre/gui2/images/news/hln.png new file mode 100644 index 0000000000..d7824d4ccc Binary files /dev/null and b/src/calibre/gui2/images/news/hln.png differ diff --git a/src/calibre/gui2/images/news/tijd.png b/src/calibre/gui2/images/news/tijd.png new file mode 100644 index 0000000000..fd55a4e5ad Binary files /dev/null and b/src/calibre/gui2/images/news/tijd.png differ diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 0c5013880f..730ca364d5 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -224,6 +224,10 @@ class ResultCache(SearchQueryParser): id = row if row_is_id else self._map_filtered[row] self._data[id][col] = val + def get(self, row, col, row_is_id=False): + id = row if row_is_id else self._map_filtered[row] + return self._data[id][col] + def index(self, id, cache=False): x = self._map if cache else self._map_filtered return x.index(id) @@ -557,6 +561,35 @@ class LibraryDatabase2(LibraryDatabase): ) + def upgrade_version_6(self): + 'Show authors in order' + self.conn.executescript(''' + BEGIN TRANSACTION; + DROP VIEW meta; + CREATE VIEW meta AS + SELECT id, title, + (SELECT sortconcat(bal.id, name) FROM books_authors_link AS bal JOIN authors ON(author = authors.id) WHERE book = books.id) authors, + (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher, + (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating, + timestamp, + (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size, + (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags, + (SELECT text FROM comments WHERE book=books.id) comments, + (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series, + series_index, + sort, + author_sort, + (SELECT concat(format) FROM data WHERE data.book=books.id) formats, + isbn, + path, + lccn, + pubdate, + flags + FROM books; + END TRANSACTION; + ''') + + def last_modified(self): ''' Return last modified time as a UTC datetime object''' @@ -1105,6 +1138,14 @@ class LibraryDatabase2(LibraryDatabase): if notify: self.notify('metadata', [id]) + def get_tags(self, id): + result = self.conn.get( + 'SELECT name FROM tags WHERE id IN (SELECT tag FROM books_tags_link WHERE book=?)', + (id,), all=True) + if not result: + return set([]) + return set([r[0] for r in result]) + def set_tags(self, id, tags, append=False, notify=True): ''' @param tags: list of strings @@ -1113,7 +1154,8 @@ class LibraryDatabase2(LibraryDatabase): if not append: self.conn.execute('DELETE FROM books_tags_link WHERE book=?', (id,)) self.conn.execute('DELETE FROM tags WHERE (SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) < 1') - for tag in set(tags): + otags = self.get_tags(id) + for tag in (set(tags)-otags): tag = tag.strip() if not tag: continue @@ -1138,13 +1180,7 @@ class LibraryDatabase2(LibraryDatabase): self.conn.execute('INSERT INTO books_tags_link(book, tag) VALUES (?,?)', (id, tid)) self.conn.commit() - try: - otags = [t.strip() for t in self.data[self.data.row(id)][FIELD_MAP['tags']].split(',')] - except AttributeError: - otags = [] - if not append: - otags = [] - tags = ','.join(otags+tags) + tags = ','.join(self.get_tags(id)) self.data.set(id, FIELD_MAP['tags'], tags, row_is_id=True) if notify: self.notify('metadata', [id]) diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index 58a1cd8d91..f874796ade 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -40,10 +40,10 @@ def convert_timestamp(val): if tz is not None: h, m = map(int, tz.split(':')) delta = timedelta(minutes=mult*(60*h + m)) - tz = type('CustomTZ', (tzinfo,), {'utcoffset':lambda self, dt:delta, + tz = type('CustomTZ', (tzinfo,), {'utcoffset':lambda self, dt:delta, 'dst':lambda self,dt:timedelta(0)})() - val = datetime(year, month, day, hours, minutes, seconds, microseconds, + val = datetime(year, month, day, hours, minutes, seconds, microseconds, tzinfo=tz) if tz is not None: val = datetime(*(val.utctimetuple()[:6])) @@ -61,11 +61,11 @@ class Concatenate(object): def __init__(self, sep=','): self.sep = sep self.ans = '' - + def step(self, value): if value is not None: self.ans += value + self.sep - + def finalize(self): if not self.ans: return None @@ -73,8 +73,23 @@ class Concatenate(object): return self.ans[:-len(self.sep)] return self.ans +class SortedConcatenate(object): + '''String concatenation aggregator for sqlite, sorted by supplied index''' + def __init__(self, sep=','): + self.sep = sep + self.ans = {} + + def step(self, ndx, value): + if value is not None: + self.ans[ndx] = value + + def finalize(self): + if len(self.ans) == 0: + return None + return self.sep.join(map(self.ans.get, sorted(self.ans.keys()))) + class Connection(sqlite.Connection): - + def get(self, *args, **kw): ans = self.execute(*args) if not kw.get('all', True): @@ -83,12 +98,12 @@ class Connection(sqlite.Connection): ans = [None] return ans[0] return ans.fetchall() - + class DBThread(Thread): - + CLOSE = '-------close---------' - + def __init__(self, path, row_factory): Thread.__init__(self) self.setDaemon(True) @@ -98,14 +113,15 @@ class DBThread(Thread): self.requests = Queue(1) self.results = Queue(1) self.conn = None - + def connect(self): - self.conn = sqlite.connect(self.path, factory=Connection, + self.conn = sqlite.connect(self.path, factory=Connection, detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES) self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row) self.conn.create_aggregate('concat', 1, Concatenate) + self.conn.create_aggregate('sortconcat', 2, SortedConcatenate) self.conn.create_function('title_sort', 1, title_sort) - + def run(self): try: self.connect() @@ -124,7 +140,7 @@ class DBThread(Thread): self.unhandled_error = (err, traceback.format_exc()) class DatabaseException(Exception): - + def __init__(self, err, tb): tb = '\n\t'.join(('\tRemote'+tb).splitlines()) msg = unicode(err) +'\n' + tb @@ -146,41 +162,41 @@ def proxy(fn): raise DatabaseException(*res) return res return run - + class ConnectionProxy(object): - + def __init__(self, proxy): self.proxy = proxy - + def close(self): if self.proxy.unhandled_error is None: self.proxy.requests.put((self.proxy.CLOSE, [], {})) - + @proxy def get(self, query, all=True): pass - - @proxy + + @proxy def commit(self): pass - + @proxy def execute(self): pass - + @proxy def executemany(self): pass - + @proxy def executescript(self): pass - + @proxy def create_aggregate(self): pass - + @proxy def create_function(self): pass - + @proxy def cursor(self): pass - + def connect(dbpath, row_factory=None): conn = ConnectionProxy(DBThread(dbpath, row_factory)) conn.proxy.start() @@ -188,4 +204,4 @@ def connect(dbpath, row_factory=None): time.sleep(0.01) if conn.proxy.unhandled_error[0] is not None: raise DatabaseException(*conn.proxy.unhandled_error) - return conn \ No newline at end of file + return conn diff --git a/src/calibre/trac/plugins/templates/linux.html b/src/calibre/trac/plugins/templates/linux.html index ffaa1d8394..b948dccc74 100644 --- a/src/calibre/trac/plugins/templates/linux.html +++ b/src/calibre/trac/plugins/templates/linux.html @@ -140,6 +140,11 @@ sudo calibre_postinstall
+

Note

+

+ If your kernel is compiled with CONFIG_SYSFS_DEPRECATED device detection may not work. +

+

Dependencies

${app} has the following dependencies (the listed version is the minimum version)

diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 2fd1c74ba8..af776493f8 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -50,6 +50,7 @@ recipe_modules = ['recipe_' + r for r in ( 'marca', 'kellog_faculty', 'kellog_insight', 'theeconomictimes_india', '7dias', 'buenosaireseconomico', 'diagonales', 'miradasalsur', 'newsweek_argentina', 'veintitres', + 'gva_be', 'hln', 'tijd', 'degentenaar', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_degentenaar.py b/src/calibre/web/feeds/recipes/recipe_degentenaar.py new file mode 100644 index 0000000000..03b324cc2d --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_degentenaar.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.nieuwsblad.be +''' +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class DeGentenaarOnline(BasicNewsRecipe): + title = 'De Gentenaar Online' + __author__ = 'Darko Miletic' + description = 'News from Belgium in Dutch' + publisher = 'De Gentenaar' + category = 'news, politics, Belgium' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = _('Dutch') + lang = 'nl-BE' + direction = 'ltr' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' + + keep_only_tags = [dict(name='span', attrs={'id':['lblArticleTitle','lblArticleIntroduction','lblArticleMainText']})] + remove_tags = [dict(name=['embed','object'])] + + + + feeds = [ + (u'Snelnieuws' , u'http://feeds.nieuwsblad.be/nieuws/snelnieuws' ) + ,(u'Binnenland' , u'http://feeds.nieuwsblad.be/nieuws/binnenland' ) + ,(u'Buitenland' , u'http://feeds.nieuwsblad.be/nieuwsblad/buitenland' ) + ,(u'Economie' , u'http://feeds.nieuwsblad.be/economie/home' ) + ,(u'Economie' , u'http://feeds.nieuwsblad.be/economie/home' ) + ,(u'Algemeen' , u'http://feeds.nieuwsblad.be/life/algemeen' ) + ,(u'Film' , u'http://feeds.nieuwsblad.be/life/film' ) + ,(u'Boek' , u'http://feeds.nieuwsblad.be/life/boeken' ) + ,(u'Muziek' , u'http://feeds.nieuwsblad.be/life/muziek' ) + ,(u'Podium' , u'http://feeds.nieuwsblad.be/life/podium' ) + ,(u'TV & radio' , u'http://feeds.nieuwsblad.be/life/tv' ) + ] + + def print_version(self, url): + return url.replace('/Detail.aspx?articleid','/PrintArticle.aspx?ArticleID') + + def get_article_url(self, article): + return article.get('guid', None) + + def preprocess_html(self, soup): + del soup.body['onload'] + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('span'): + item.name='div' + if item.has_key('id') and item['id'] == 'lblArticleTitle': + item.name='h3' + + soup.html['lang'] = self.lang + soup.html['dir' ] = self.direction + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_gva_be.py b/src/calibre/web/feeds/recipes/recipe_gva_be.py new file mode 100644 index 0000000000..a02807d8d2 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_gva_be.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.gva.be +''' +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class GazetvanAntwerpen(BasicNewsRecipe): + title = 'Gazet van Antwerpen' + __author__ = 'Darko Miletic' + description = 'News from Belgium in Dutch' + publisher = 'Gazet van Antwerpen' + category = 'news, politics, Belgium' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = _('Dutch') + lang = 'nl-BE' + direction = 'ltr' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' + + keep_only_tags = [dict(name='div', attrs={'id':'article'})] + remove_tags = [ + dict(name=['embed','object']) + , dict (name='div',attrs={'class':['note NotePortrait','note']}) + ] + remove_tags_after = dict(name='span', attrs={'class':'author'}) + + feeds = [ + (u'Overzicht & Blikvanger', u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/overview/overzicht' ) + ,(u'Stad & Regio' , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/stadenregio' ) + ,(u'Economie' , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/economie' ) + ,(u'Binnenland' , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/binnenland' ) + ,(u'Buitenland' , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/buitenland' ) + ,(u'Media & Cultur' , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/mediaencultuur') + ,(u'Wetenschap' , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/mediaencultuur') + ,(u'Sport' , u'http://www.gva.be/syndicationservices/artfeedservice.svc/rss/mostrecent/sport' ) + ] + + def preprocess_html(self, soup): + del soup.body['onload'] + for item in soup.findAll(style=True): + del item['style'] + soup.html['lang'] = self.lang + soup.html['dir' ] = self.direction + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_hln.py b/src/calibre/web/feeds/recipes/recipe_hln.py new file mode 100644 index 0000000000..c2b6b2375a --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_hln.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.hln.be +''' +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class HLN_be(BasicNewsRecipe): + title = 'Het Belang Van Limburg' + __author__ = 'Darko Miletic' + description = 'News from Belgium in Dutch' + publisher = 'Het Belang Van Limburg' + category = 'news, politics, Belgium' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = _('Dutch') + lang = 'nl-BE' + direction = 'ltr' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' + + keep_only_tags = [dict(name='div', attrs={'class':'art_box2'})] + remove_tags = [ + dict(name=['embed','object']) + ] + + feeds = [(u'Alle nieuws', u'http://www.hln.be/rss.xml')] + + def preprocess_html(self, soup): + del soup.body['onload'] + for item in soup.findAll(style=True): + del item['style'] + soup.html['lang'] = self.lang + soup.html['dir' ] = self.direction + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return soup + diff --git a/src/calibre/web/feeds/recipes/recipe_tijd.py b/src/calibre/web/feeds/recipes/recipe_tijd.py new file mode 100644 index 0000000000..7e8fe62449 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_tijd.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.tijd.be +''' +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class DeTijd(BasicNewsRecipe): + title = 'De Tijd' + __author__ = 'Darko Miletic' + description = 'News from Belgium in Dutch' + publisher = 'De Tijd' + category = 'news, politics, Belgium' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + language = _('Dutch') + lang = 'nl-BE' + direction = 'ltr' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' + + keep_only_tags = [dict(name='div', attrs={'id':'lcol'})] + remove_tags = [ + dict(name=['embed','object']) + , dict (name='div',attrs={'id':'art_reactwrap'}) + ] + remove_tags_after = dict(name='div', attrs={'id':'art_author'}) + + feeds = [ + (u'Volledig nieuwsaanbod', u'http://www.tijd.be/rss/nieuws.xml' ) + ,(u'Markten' , u'http://www.tijd.be/rss/markten.xml' ) + ,(u'Ondernemingen' , u'http://www.tijd.be/rss/ondernemingen.xml' ) + ,(u'Chemie-Farma' , u'http://www.tijd.be/rss/chemie_farma.xml' ) + ,(u'Consumptie' , u'http://www.tijd.be/rss/consumptie.xml' ) + ,(u'Diensten' , u'http://www.tijd.be/rss/diensten.xml' ) + ,(u'Energie' , u'http://www.tijd.be/rss/energie.xml' ) + ,(u'Financen' , u'http://www.tijd.be/rss/financien.xml' ) + ,(u'Industrie' , u'http://www.tijd.be/rss/industrie.xml' ) + ,(u'Media' , u'http://www.tijd.be/rss/media_telecom.xml' ) + ,(u'Technologie' , u'http://www.tijd.be/rss/technologie.xml' ) + ,(u'Economie & Financien' , u'http://www.tijd.be/rss/economie.xml' ) + ,(u'Binnenland' , u'http://www.tijd.be/rss/binnenland.xml' ) + ,(u'Buitenland' , u'http://www.tijd.be/rss/buitenland.xml' ) + ,(u'De wijde wereld' , u'http://www.tijd.be/rss/cultuur.xml' ) + ] + + def preprocess_html(self, soup): + del soup.body['onload'] + for item in soup.findAll(style=True): + del item['style'] + soup.html['lang'] = self.lang + soup.html['dir' ] = self.direction + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return soup +