diff --git a/resources/images/news/joop.png b/resources/images/news/joop.png new file mode 100644 index 0000000000..0ea5e422e1 Binary files /dev/null and b/resources/images/news/joop.png differ diff --git a/resources/images/news/nrcnext.png b/resources/images/news/nrcnext.png new file mode 100644 index 0000000000..1349755925 Binary files /dev/null and b/resources/images/news/nrcnext.png differ diff --git a/resources/quick_start.epub b/resources/quick_start.epub new file mode 100644 index 0000000000..a70f9f13ec Binary files /dev/null and b/resources/quick_start.epub differ diff --git a/resources/recipes/fokkeensukke.recipe b/resources/recipes/fokkeensukke.recipe index 3ddbe1cfe5..76a4aa39b9 100644 --- a/resources/recipes/fokkeensukke.recipe +++ b/resources/recipes/fokkeensukke.recipe @@ -1,23 +1,29 @@ -#!/usr/bin/python from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class FokkeEnSukkeRecipe(BasicNewsRecipe) : __license__ = 'GPL v3' __author__ = 'kwetal' language = 'nl' - description = u'Popular Dutch daily cartoon Fokke en Sukke' + country = 'NL' + version = 2 title = u'Fokke en Sukke' - no_stylesheets = True - # For reasons unknown to me the extra css is, on the cartoon pages, inserted in the and not in the . My reader (Sony PRS-600) has a serious issue - # with that: it treats it as content and displays it as is. Setting this property to empty solves this for me. - template_css = '' - INDEX = u'http://foksuk.nl' + publisher = u'Reid, Geleijnse & Van Tol' + category = u'News, Cartoons' + description = u'Popular Dutch daily cartoon Fokke en Sukke' - # This cover is not as nice as it could be, needs some work - #cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif' + conversion_options = {'comments': description, 'language': language, 'publisher': publisher} + + no_stylesheets = True + extra_css = ''' + body{font-family: verdana, arial, helvetica, geneva, sans-serif ; margin: 0em; padding: 0em;} + div.title {text-align: center; margin-bottom: 1em;} + ''' + + INDEX = u'http://foksuk.nl' + cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif' keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})] @@ -31,15 +37,14 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) : links = index.findAll('a') maxIndex = len(links) - 1 articles = [] - for i in range(len(links)) : - # The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice. - if i == 0 : - continue - - # There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname. - # If there are two, there are three links in sequence: dayname 1 2. In that case we're interested in the last two. + for i in range(1, len(links)) : + # There can be more than one cartoon for a given day (currently either one or two). + # If there's only one, there is just a link with the dayname. + # If there are two, there are three links in sequence: dayname 1 2. + # In that case we're interested in the last two. if links[i].renderContents() in dayNames : - # If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content + # If the link is not in daynames, we processed it already, but if it is, let's see + # if the next one has '1' as content if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') : # Got you! Add it to the list article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''} @@ -59,29 +64,31 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) : return [[week, articles]] def preprocess_html(self, soup) : - # This method is called for every page, be it cartoon or TOC. We need to process each in their own way cartoon = soup.find('div', attrs={'class' : 'cartoon'}) - if cartoon : - # It is a cartoon. Extract the title. - title = '' - img = soup.find('img', attrs = {'alt' : True}) - if img : - title = img['alt'] - # Using the 'extra_css' displays it in the and not in the . See comment at the top of this class. Setting the style this way solves that. - tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')]) - tag.insert(0, title) - cartoon.insert(0, tag) + title = '' + img = soup.find('img', attrs = {'alt' : True}) + if img : + title = img['alt'] - # I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier, - # and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook. - select = cartoon.find('div', attrs={'class' : 'selectcartoon'}) - if select : - select.extract() + tag = Tag(soup, 'div', [('class', 'title')]) + tag.insert(0, title) + cartoon.insert(0, tag) - return cartoon - else : - # It is a TOC. Just return the whole lot. - return soup + # We only want the cartoon, so throw out the index + select = cartoon.find('div', attrs={'class' : 'selectcartoon'}) + if select : + select.extract() + + freshSoup = self.getFreshSoup(soup) + freshSoup.body.append(cartoon) + + return freshSoup + + def getFreshSoup(self, oldSoup): + freshSoup = BeautifulSoup('') + if oldSoup.head.title: + freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title)) + return freshSoup diff --git a/resources/recipes/joop.recipe b/resources/recipes/joop.recipe new file mode 100644 index 0000000000..a913328b9b --- /dev/null +++ b/resources/recipes/joop.recipe @@ -0,0 +1,91 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag +import re + +class JoopRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'nl' + country = 'NL' + version = 1 + + title = u'Joop' + publisher = u'Vara' + category = u'News, Politics, Discussion' + description = u'Political blog from the Netherlands' + + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = False + + no_stylesheets = True + remove_javascript = True + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'div', attrs = {'class': 'author_head clearfix photo'})) + keep_only_tags.append(dict(name = 'h2', attrs = {'class': 'columnhead smallline'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class': re.compile('article.*')})) + + extra_css = ''' + body {font-family: verdana, arial, helvetica, geneva, sans-serif;} + img {margin-right: 0.4em;} + h3 {font-size: medium; font-style: italic; font-weight: normal;} + h2 {font-size: xx-large; font-weight: bold} + sub {color: #666666; font-size: x-small; font-weight: normal;} + div.joop_byline {font-size: large} + div.joop_byline_job {font-size: small; color: #696969;} + div.joop_date {font-size: x-small; font-style: italic; margin-top: 0.6em} + ''' + + INDEX = 'http://www.joop.nl' + + conversion_options = {'comments': description, 'tags': category, 'language': language, + 'publisher': publisher} + + def parse_index(self): + sections = ['Politiek', 'Wereld', 'Economie', 'Groen', 'Media', 'Leven', 'Show', 'Opinies'] + soup = self.index_to_soup(self.INDEX) + answer = [] + + div = soup.find('div', attrs = {'id': 'footer'}) + for section in sections: + articles = [] + h2 = div.find(lambda tag: tag.name == 'h2' and tag.renderContents() == section) + if h2: + ul = h2.findNextSibling('ul', 'linklist') + if ul: + for li in ul.findAll('li'): + title = self.tag_to_string(li.a) + url = self.INDEX + li.a['href'] + articles.append({'title': title, 'date': None, 'url': url, 'description': ''}) + + answer.append((section, articles)) + + return answer + + def preprocess_html(self, soup): + div = soup.find('div', 'author_head clearfix photo') + if div: + h2 = soup.find('h2') + if h2: + h2.name = 'div' + h2['class'] = 'joop_byline' + span = h2.find('span') + if span: + span.name = 'div' + span['class'] = 'joop_byline_job' + div.replaceWith(h2) + + h2 = soup.find('h2', attrs = {'class': 'columnhead smallline'}) + if h2: + txt = None + span = h2.find('span', 'info') + if span: + txt = span.find(text = True) + div = Tag(soup, 'div', attrs = [('class', 'joop_date')]) + div.append(txt) + h2.replaceWith(div) + + return soup + + diff --git a/resources/recipes/ledevoir.recipe b/resources/recipes/ledevoir.recipe new file mode 100644 index 0000000000..4612beea2e --- /dev/null +++ b/resources/recipes/ledevoir.recipe @@ -0,0 +1,80 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini' +__copyright__ = '2009, Lorenzo Vigentini ' +__version__ = 'v1.01' +__date__ = '14, January 2010' +__description__ = 'Canadian Paper ' + +''' +http://www.ledevoir.com/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ledevoir(BasicNewsRecipe): + author = 'Lorenzo Vigentini' + description = 'Canadian Paper' + + cover_url = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif' + title = u'Le Devoir' + publisher = 'leDevoir.com' + category = 'News, finance, economy, politics' + + language = 'fr' + encoding = 'utf-8' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 1 + max_articles_per_feed = 50 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True + + keep_only_tags = [ + dict(name='div', attrs={'id':'article'}), + dict(name='ul', attrs={'id':'ariane'}) + ] + + remove_tags = [ + dict(name='div', attrs={'id':'dialog'}), + dict(name='div', attrs={'class':['interesse_actions','reactions']}), + dict(name='ul', attrs={'class':'mots_cles'}), + dict(name='a', attrs={'class':'haut'}), + dict(name='h5', attrs={'class':'interesse_actions'}) + ] + + feeds = [ + (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'), + (u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'), + (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'), + (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'), + (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'), + (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'), + (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'), + (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'), + (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'), + (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'), + (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'), + (u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50') + ] + + extra_css = ''' + h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;} + h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;} + h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;} + h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; } + h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;} + .specs {line-height:1em;margin:1px 0;} + .specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;} + .specs span.auteur a, + .specs span.auteur span {text-transform:uppercase;color:#787878;} + .specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;} + ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;} + ul#ariane li {display:inline;} + ul#ariane a {color:#2E2E2E;text-decoration:underline;} + .credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;} + .texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;} + ''' diff --git a/resources/recipes/ncrnext.recipe b/resources/recipes/ncrnext.recipe index d8a51e62c8..e03da301fa 100644 --- a/resources/recipes/ncrnext.recipe +++ b/resources/recipes/ncrnext.recipe @@ -1,29 +1,38 @@ from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class NrcNextRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' + __license__ = 'GPL v3' __author__ = 'kwetal' - version = 1 language = 'nl' + country = 'NL' + version = 2 + + title = u'nrcnext' + publisher = u'NRC Media' + category = u'News, Opinion, the Netherlands' description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.' - title = u'nrcnext' + + conversion_options = {'comments': description, 'language': language, 'publisher': publisher} no_stylesheets = True - template_css = '' + remove_javascript = True - # I want to do some special processing on the articles. I could not solve it with the 'extra_css' property . So we do it the hard way. keep_only_tags = [dict(name='div', attrs={'id' : 'main'})] - # If that's overkill for you comment out the previous line and uncomment the next. Then get rid of the preprocess_html() method. - #keep_only_tags = [dict(name='div', attrs={'class' : 'post'}), dict(name='div', attrs={'class' : 'vlag'}) ] - remove_tags = [dict(name = 'div', attrs = {'class' : 'meta'}), - dict(name = 'div', attrs = {'class' : 'datumlabel'}), - dict(name = 'ul', attrs = {'class' : 'cats single'}), - dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}), - dict(name = 'ul', attrs = {'class' : 'cats rubrieken'})] + remove_tags = [] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'meta'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'datumlabel'})) + remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats single'})) + remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'})) + remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats rubrieken'})) - use_embedded_content = False + extra_css = ''' + body {font-family: verdana, arial, helvetica, geneva, sans-serif; text-align: left;} + p.wp-caption-text {font-size: x-small; color: #666666;} + h2.sub_title {font-size: medium; color: #696969;} + h2.vlag {font-size: small; font-weight: bold;} + ''' def parse_index(self) : # Use the wesbite as an index. Their RSS feeds can be out of date. @@ -44,10 +53,11 @@ class NrcNextRecipe(BasicNewsRecipe): # Find the links to the actual articles and rember the location they're pointing to and the title a = post.find('a', attrs={'rel' : 'bookmark'}) href = a['href'] - title = a.renderContents() + title = self.tag_to_string(a) if index == 'columnisten' : - # In this feed/page articles can be written by more than one author. It is nice to see their names in the titles. + # In this feed/page articles can be written by more than one author. + # It is nice to see their names in the titles. flag = post.find('h2', attrs = {'class' : 'vlag'}) author = flag.contents[0].renderContents() completeTitle = u''.join([author, u': ', title]) @@ -71,44 +81,46 @@ class NrcNextRecipe(BasicNewsRecipe): return answer def preprocess_html(self, soup) : - # This method is called for every page, be it cartoon or TOC. We need to process each in their own way - if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}) : - # It's an article, find the interesting part + if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}): tag = soup.find('div', attrs = {'class' : 'post'}) - if tag : - # And replace any links with their text, so they don't show up underlined on my reader. - for link in tag.findAll('a') : - link.replaceWith(link.renderContents()) + if tag: + h2 = tag.find('h2', 'vlag') + if h2: + new_h2 = Tag(soup, 'h2', attrs = [('class', 'vlag')]) + new_h2.append(self.tag_to_string(h2)) + h2.replaceWith(new_h2) + else: + h2 = tag.find('h2') + if h2: + new_h2 = Tag(soup, 'h2', attrs = [('class', 'sub_title')]) + new_h2.append(self.tag_to_string(h2)) + h2.replaceWith(new_h2) - # Slows down my Sony reader; feel free to comment out - for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}) : + h1 = tag.find('h1') + if h1: + new_h1 = Tag(soup, 'h1') + new_h1.append(self.tag_to_string(h1)) + h1.replaceWith(new_h1) + + # Slows down my reader. + for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}): movie.extract() - for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}) : + for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}): movie.extract() + for iframe in tag.findAll('iframe') : + iframe.extract() - homeMadeSoup = BeautifulSoup('') - body = homeMadeSoup.find('body') - body.append(tag) + fresh_soup = self.getFreshSoup(soup) + fresh_soup.body.append(tag) - return homeMadeSoup - else : + return fresh_soup + else: # This should never happen and other famous last words... return soup - else : - # It's a TOC, return the whole lot. - return soup - - def postproces_html(self, soup) : - # Should not happen, but it does. Slows down my Sony eReader - for img in soup.findAll('img') : - if img['src'].startswith('http://') : - img.extract() - - # Happens for some movies which we are not able to view anyway - for iframe in soup.findAll('iframe') : - if iframe['src'].startswith('http://') : - iframe.extract() - - + def getFreshSoup(self, oldSoup): + freshSoup = BeautifulSoup('') + if oldSoup.head.title: + freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title)) + return freshSoup diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index db4bb5c754..34f9f57161 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -10,11 +10,12 @@ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \ ORG_NAME = 'KovidsBrain' APP_UID = 'libprs500' from calibre import islinux, iswindows, isosx -from calibre.utils.config import Config, ConfigProxy, dynamic +from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig from calibre.utils.localization import set_qt_translator from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats from calibre.ebooks.metadata import MetaInformation +gprefs = JSONConfig('gui') NONE = QVariant() #: Null value to return from the data function of item models diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index f85a19da24..6cbae7f7b0 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -31,7 +31,7 @@ from calibre.utils.ipc.server import Server from calibre.gui2 import warning_dialog, choose_files, error_dialog, \ question_dialog,\ pixmap_to_data, choose_dir, \ - Dispatcher, \ + Dispatcher, gprefs, \ available_height, \ max_available_height, config, info_dialog, \ available_width, GetMetadata @@ -518,7 +518,21 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): self.connect(self.library_view.model(), SIGNAL('count_changed(int)'), self.tags_view.recount) self.connect(self.search, SIGNAL('cleared()'), self.tags_view.clear) + if not gprefs.get('quick_start_guide_added', False): + from calibre.ebooks.metadata import MetaInformation + mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember']) + mi.author_sort = 'Schember, John' + mi.comments = "A guide to get you up an running with calibre" + mi.publisher = 'calibre' + self.library_view.model().add_books([P('quick_start.epub')], ['epub'], + [mi]) + gprefs['quick_start_guide_added'] = True + self.library_view.model().books_added(1) + if hasattr(self, 'db_images'): + self.db_images.reset() + self.library_view.model().count_changed() + ########################### Cover Flow ################################ self.cover_flow = None if CoverFlow is not None: @@ -1008,7 +1022,6 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): return self._add_books(books, to_device) - def _add_books(self, paths, to_device, on_card=None): if on_card is None: on_card = 'carda' if self.stack.currentIndex() == 2 else 'cardb' if self.stack.currentIndex() == 3 else None diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index 697cfbe388..a0e5632cb7 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en' ''' Manage application-wide preferences. ''' -import os, re, cPickle, textwrap, traceback, plistlib +import os, re, cPickle, textwrap, traceback, plistlib, json from copy import deepcopy from functools import partial from optparse import OptionParser as _OptionParser @@ -564,23 +564,31 @@ class XMLConfig(dict): data types. ''' + EXTENSION = '.plist' + def __init__(self, rel_path_to_cf_file): dict.__init__(self) self.file_path = os.path.join(config_dir, *(rel_path_to_cf_file.split('/'))) self.file_path = os.path.abspath(self.file_path) - if not self.file_path.endswith('.plist'): - self.file_path += '.plist' + if not self.file_path.endswith(self.EXTENSION): + self.file_path += self.EXTENSION self.refresh() + def raw_to_object(self, raw): + return plistlib.readPlistFromString(raw) + + def to_raw(self): + return plistlib.writePlistToString(self) + def refresh(self): d = {} if os.path.exists(self.file_path): with ExclusiveFile(self.file_path) as f: raw = f.read() try: - d = plistlib.readPlistFromString(raw) if raw.strip() else {} + d = self.raw_to_object(raw) if raw.strip() else {} except SystemError: pass except: @@ -618,11 +626,21 @@ class XMLConfig(dict): if not os.path.exists(dpath): os.makedirs(dpath, mode=CONFIG_DIR_MODE) with ExclusiveFile(self.file_path) as f: - raw = plistlib.writePlistToString(self) + raw = self.to_raw() f.seek(0) f.truncate() f.write(raw) +class JSONConfig(XMLConfig): + + EXTENSION = '.json' + + def raw_to_object(self, raw): + return json.loads(raw.decode('utf-8')) + + def to_raw(self): + return json.dumps(self, indent=2) + def _prefs(): c = Config('global', 'calibre wide preferences')