mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Sync to trunk.
This commit is contained in:
commit
2b2659d955
BIN
resources/images/news/joop.png
Normal file
BIN
resources/images/news/joop.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 395 B |
BIN
resources/images/news/nrcnext.png
Normal file
BIN
resources/images/news/nrcnext.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.7 KiB |
BIN
resources/quick_start.epub
Normal file
BIN
resources/quick_start.epub
Normal file
Binary file not shown.
@ -1,23 +1,29 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
|
||||||
class FokkeEnSukkeRecipe(BasicNewsRecipe) :
|
class FokkeEnSukkeRecipe(BasicNewsRecipe) :
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'kwetal'
|
__author__ = 'kwetal'
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
description = u'Popular Dutch daily cartoon Fokke en Sukke'
|
country = 'NL'
|
||||||
|
version = 2
|
||||||
|
|
||||||
title = u'Fokke en Sukke'
|
title = u'Fokke en Sukke'
|
||||||
no_stylesheets = True
|
publisher = u'Reid, Geleijnse & Van Tol'
|
||||||
# For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue
|
category = u'News, Cartoons'
|
||||||
# with that: it treats it as content and displays it as is. Setting this property to empty solves this for me.
|
description = u'Popular Dutch daily cartoon Fokke en Sukke'
|
||||||
template_css = ''
|
|
||||||
INDEX = u'http://foksuk.nl'
|
|
||||||
|
|
||||||
# This cover is not as nice as it could be, needs some work
|
conversion_options = {'comments': description, 'language': language, 'publisher': publisher}
|
||||||
#cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
|
|
||||||
|
no_stylesheets = True
|
||||||
|
extra_css = '''
|
||||||
|
body{font-family: verdana, arial, helvetica, geneva, sans-serif ; margin: 0em; padding: 0em;}
|
||||||
|
div.title {text-align: center; margin-bottom: 1em;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
INDEX = u'http://foksuk.nl'
|
||||||
|
cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
|
keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
|
||||||
|
|
||||||
@ -31,15 +37,14 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) :
|
|||||||
links = index.findAll('a')
|
links = index.findAll('a')
|
||||||
maxIndex = len(links) - 1
|
maxIndex = len(links) - 1
|
||||||
articles = []
|
articles = []
|
||||||
for i in range(len(links)) :
|
for i in range(1, len(links)) :
|
||||||
# The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice.
|
# There can be more than one cartoon for a given day (currently either one or two).
|
||||||
if i == 0 :
|
# If there's only one, there is just a link with the dayname.
|
||||||
continue
|
# If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>.
|
||||||
|
# In that case we're interested in the last two.
|
||||||
# There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
|
|
||||||
# If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
|
|
||||||
if links[i].renderContents() in dayNames :
|
if links[i].renderContents() in dayNames :
|
||||||
# If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content
|
# If the link is not in daynames, we processed it already, but if it is, let's see
|
||||||
|
# if the next one has '1' as content
|
||||||
if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
|
if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
|
||||||
# Got you! Add it to the list
|
# Got you! Add it to the list
|
||||||
article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''}
|
article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''}
|
||||||
@ -59,29 +64,31 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) :
|
|||||||
return [[week, articles]]
|
return [[week, articles]]
|
||||||
|
|
||||||
def preprocess_html(self, soup) :
|
def preprocess_html(self, soup) :
|
||||||
# This method is called for every page, be it cartoon or TOC. We need to process each in their own way
|
|
||||||
cartoon = soup.find('div', attrs={'class' : 'cartoon'})
|
cartoon = soup.find('div', attrs={'class' : 'cartoon'})
|
||||||
if cartoon :
|
|
||||||
# It is a cartoon. Extract the title.
|
|
||||||
title = ''
|
title = ''
|
||||||
img = soup.find('img', attrs = {'alt' : True})
|
img = soup.find('img', attrs = {'alt' : True})
|
||||||
if img :
|
if img :
|
||||||
title = img['alt']
|
title = img['alt']
|
||||||
|
|
||||||
# Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that.
|
tag = Tag(soup, 'div', [('class', 'title')])
|
||||||
tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')])
|
|
||||||
tag.insert(0, title)
|
tag.insert(0, title)
|
||||||
cartoon.insert(0, tag)
|
cartoon.insert(0, tag)
|
||||||
|
|
||||||
# I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier,
|
# We only want the cartoon, so throw out the index
|
||||||
# and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook.
|
|
||||||
select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
|
select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
|
||||||
if select :
|
if select :
|
||||||
select.extract()
|
select.extract()
|
||||||
|
|
||||||
return cartoon
|
freshSoup = self.getFreshSoup(soup)
|
||||||
else :
|
freshSoup.body.append(cartoon)
|
||||||
# It is a TOC. Just return the whole lot.
|
|
||||||
return soup
|
return freshSoup
|
||||||
|
|
||||||
|
def getFreshSoup(self, oldSoup):
|
||||||
|
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
|
||||||
|
if oldSoup.head.title:
|
||||||
|
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
|
||||||
|
return freshSoup
|
||||||
|
|
||||||
|
|
||||||
|
91
resources/recipes/joop.recipe
Normal file
91
resources/recipes/joop.recipe
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
import re
|
||||||
|
|
||||||
|
class JoopRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'kwetal'
|
||||||
|
language = 'nl'
|
||||||
|
country = 'NL'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'Joop'
|
||||||
|
publisher = u'Vara'
|
||||||
|
category = u'News, Politics, Discussion'
|
||||||
|
description = u'Political blog from the Netherlands'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
keep_only_tags = []
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'author_head clearfix photo'}))
|
||||||
|
keep_only_tags.append(dict(name = 'h2', attrs = {'class': 'columnhead smallline'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': re.compile('article.*')}))
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
|
||||||
|
img {margin-right: 0.4em;}
|
||||||
|
h3 {font-size: medium; font-style: italic; font-weight: normal;}
|
||||||
|
h2 {font-size: xx-large; font-weight: bold}
|
||||||
|
sub {color: #666666; font-size: x-small; font-weight: normal;}
|
||||||
|
div.joop_byline {font-size: large}
|
||||||
|
div.joop_byline_job {font-size: small; color: #696969;}
|
||||||
|
div.joop_date {font-size: x-small; font-style: italic; margin-top: 0.6em}
|
||||||
|
'''
|
||||||
|
|
||||||
|
INDEX = 'http://www.joop.nl'
|
||||||
|
|
||||||
|
conversion_options = {'comments': description, 'tags': category, 'language': language,
|
||||||
|
'publisher': publisher}
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
sections = ['Politiek', 'Wereld', 'Economie', 'Groen', 'Media', 'Leven', 'Show', 'Opinies']
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
answer = []
|
||||||
|
|
||||||
|
div = soup.find('div', attrs = {'id': 'footer'})
|
||||||
|
for section in sections:
|
||||||
|
articles = []
|
||||||
|
h2 = div.find(lambda tag: tag.name == 'h2' and tag.renderContents() == section)
|
||||||
|
if h2:
|
||||||
|
ul = h2.findNextSibling('ul', 'linklist')
|
||||||
|
if ul:
|
||||||
|
for li in ul.findAll('li'):
|
||||||
|
title = self.tag_to_string(li.a)
|
||||||
|
url = self.INDEX + li.a['href']
|
||||||
|
articles.append({'title': title, 'date': None, 'url': url, 'description': ''})
|
||||||
|
|
||||||
|
answer.append((section, articles))
|
||||||
|
|
||||||
|
return answer
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
div = soup.find('div', 'author_head clearfix photo')
|
||||||
|
if div:
|
||||||
|
h2 = soup.find('h2')
|
||||||
|
if h2:
|
||||||
|
h2.name = 'div'
|
||||||
|
h2['class'] = 'joop_byline'
|
||||||
|
span = h2.find('span')
|
||||||
|
if span:
|
||||||
|
span.name = 'div'
|
||||||
|
span['class'] = 'joop_byline_job'
|
||||||
|
div.replaceWith(h2)
|
||||||
|
|
||||||
|
h2 = soup.find('h2', attrs = {'class': 'columnhead smallline'})
|
||||||
|
if h2:
|
||||||
|
txt = None
|
||||||
|
span = h2.find('span', 'info')
|
||||||
|
if span:
|
||||||
|
txt = span.find(text = True)
|
||||||
|
div = Tag(soup, 'div', attrs = [('class', 'joop_date')])
|
||||||
|
div.append(txt)
|
||||||
|
h2.replaceWith(div)
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
80
resources/recipes/ledevoir.recipe
Normal file
80
resources/recipes/ledevoir.recipe
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Lorenzo Vigentini'
|
||||||
|
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||||
|
__version__ = 'v1.01'
|
||||||
|
__date__ = '14, January 2010'
|
||||||
|
__description__ = 'Canadian Paper '
|
||||||
|
|
||||||
|
'''
|
||||||
|
http://www.ledevoir.com/
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ledevoir(BasicNewsRecipe):
|
||||||
|
author = 'Lorenzo Vigentini'
|
||||||
|
description = 'Canadian Paper'
|
||||||
|
|
||||||
|
cover_url = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
|
||||||
|
title = u'Le Devoir'
|
||||||
|
publisher = 'leDevoir.com'
|
||||||
|
category = 'News, finance, economy, politics'
|
||||||
|
|
||||||
|
language = 'fr'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
use_embedded_content = False
|
||||||
|
recursion = 10
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'article'}),
|
||||||
|
dict(name='ul', attrs={'id':'ariane'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':'dialog'}),
|
||||||
|
dict(name='div', attrs={'class':['interesse_actions','reactions']}),
|
||||||
|
dict(name='ul', attrs={'class':'mots_cles'}),
|
||||||
|
dict(name='a', attrs={'class':'haut'}),
|
||||||
|
dict(name='h5', attrs={'class':'interesse_actions'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
|
||||||
|
(u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
|
||||||
|
(u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
|
||||||
|
(u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
|
||||||
|
(u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
|
||||||
|
(u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
|
||||||
|
(u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
|
||||||
|
(u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
|
||||||
|
(u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
|
||||||
|
(u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
|
||||||
|
(u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
|
||||||
|
(u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
|
||||||
|
h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
|
||||||
|
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
|
||||||
|
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
|
||||||
|
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
|
||||||
|
.specs {line-height:1em;margin:1px 0;}
|
||||||
|
.specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
|
||||||
|
.specs span.auteur a,
|
||||||
|
.specs span.auteur span {text-transform:uppercase;color:#787878;}
|
||||||
|
.specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
|
||||||
|
ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
|
||||||
|
ul#ariane li {display:inline;}
|
||||||
|
ul#ariane a {color:#2E2E2E;text-decoration:underline;}
|
||||||
|
.credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
|
||||||
|
.texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
|
||||||
|
'''
|
@ -1,29 +1,38 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class NrcNextRecipe(BasicNewsRecipe):
|
class NrcNextRecipe(BasicNewsRecipe):
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'kwetal'
|
__author__ = 'kwetal'
|
||||||
version = 1
|
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.'
|
country = 'NL'
|
||||||
|
version = 2
|
||||||
|
|
||||||
title = u'nrcnext'
|
title = u'nrcnext'
|
||||||
|
publisher = u'NRC Media'
|
||||||
|
category = u'News, Opinion, the Netherlands'
|
||||||
|
description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.'
|
||||||
|
|
||||||
|
conversion_options = {'comments': description, 'language': language, 'publisher': publisher}
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
template_css = ''
|
remove_javascript = True
|
||||||
|
|
||||||
# I want to do some special processing on the articles. I could not solve it with the 'extra_css' property . So we do it the hard way.
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id' : 'main'})]
|
keep_only_tags = [dict(name='div', attrs={'id' : 'main'})]
|
||||||
# If that's overkill for you comment out the previous line and uncomment the next. Then get rid of the preprocess_html() method.
|
|
||||||
#keep_only_tags = [dict(name='div', attrs={'class' : 'post'}), dict(name='div', attrs={'class' : 'vlag'}) ]
|
|
||||||
|
|
||||||
remove_tags = [dict(name = 'div', attrs = {'class' : 'meta'}),
|
remove_tags = []
|
||||||
dict(name = 'div', attrs = {'class' : 'datumlabel'}),
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'meta'}))
|
||||||
dict(name = 'ul', attrs = {'class' : 'cats single'}),
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'datumlabel'}))
|
||||||
dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}),
|
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats single'}))
|
||||||
dict(name = 'ul', attrs = {'class' : 'cats rubrieken'})]
|
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}))
|
||||||
|
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats rubrieken'}))
|
||||||
|
|
||||||
use_embedded_content = False
|
extra_css = '''
|
||||||
|
body {font-family: verdana, arial, helvetica, geneva, sans-serif; text-align: left;}
|
||||||
|
p.wp-caption-text {font-size: x-small; color: #666666;}
|
||||||
|
h2.sub_title {font-size: medium; color: #696969;}
|
||||||
|
h2.vlag {font-size: small; font-weight: bold;}
|
||||||
|
'''
|
||||||
|
|
||||||
def parse_index(self) :
|
def parse_index(self) :
|
||||||
# Use the wesbite as an index. Their RSS feeds can be out of date.
|
# Use the wesbite as an index. Their RSS feeds can be out of date.
|
||||||
@ -44,10 +53,11 @@ class NrcNextRecipe(BasicNewsRecipe):
|
|||||||
# Find the links to the actual articles and rember the location they're pointing to and the title
|
# Find the links to the actual articles and rember the location they're pointing to and the title
|
||||||
a = post.find('a', attrs={'rel' : 'bookmark'})
|
a = post.find('a', attrs={'rel' : 'bookmark'})
|
||||||
href = a['href']
|
href = a['href']
|
||||||
title = a.renderContents()
|
title = self.tag_to_string(a)
|
||||||
|
|
||||||
if index == 'columnisten' :
|
if index == 'columnisten' :
|
||||||
# In this feed/page articles can be written by more than one author. It is nice to see their names in the titles.
|
# In this feed/page articles can be written by more than one author.
|
||||||
|
# It is nice to see their names in the titles.
|
||||||
flag = post.find('h2', attrs = {'class' : 'vlag'})
|
flag = post.find('h2', attrs = {'class' : 'vlag'})
|
||||||
author = flag.contents[0].renderContents()
|
author = flag.contents[0].renderContents()
|
||||||
completeTitle = u''.join([author, u': ', title])
|
completeTitle = u''.join([author, u': ', title])
|
||||||
@ -71,44 +81,46 @@ class NrcNextRecipe(BasicNewsRecipe):
|
|||||||
return answer
|
return answer
|
||||||
|
|
||||||
def preprocess_html(self, soup) :
|
def preprocess_html(self, soup) :
|
||||||
# This method is called for every page, be it cartoon or TOC. We need to process each in their own way
|
|
||||||
if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}):
|
if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}):
|
||||||
# It's an article, find the interesting part
|
|
||||||
tag = soup.find('div', attrs = {'class' : 'post'})
|
tag = soup.find('div', attrs = {'class' : 'post'})
|
||||||
if tag:
|
if tag:
|
||||||
# And replace any links with their text, so they don't show up underlined on my reader.
|
h2 = tag.find('h2', 'vlag')
|
||||||
for link in tag.findAll('a') :
|
if h2:
|
||||||
link.replaceWith(link.renderContents())
|
new_h2 = Tag(soup, 'h2', attrs = [('class', 'vlag')])
|
||||||
|
new_h2.append(self.tag_to_string(h2))
|
||||||
|
h2.replaceWith(new_h2)
|
||||||
|
else:
|
||||||
|
h2 = tag.find('h2')
|
||||||
|
if h2:
|
||||||
|
new_h2 = Tag(soup, 'h2', attrs = [('class', 'sub_title')])
|
||||||
|
new_h2.append(self.tag_to_string(h2))
|
||||||
|
h2.replaceWith(new_h2)
|
||||||
|
|
||||||
# Slows down my Sony reader; feel free to comment out
|
h1 = tag.find('h1')
|
||||||
|
if h1:
|
||||||
|
new_h1 = Tag(soup, 'h1')
|
||||||
|
new_h1.append(self.tag_to_string(h1))
|
||||||
|
h1.replaceWith(new_h1)
|
||||||
|
|
||||||
|
# Slows down my reader.
|
||||||
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}):
|
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}):
|
||||||
movie.extract()
|
movie.extract()
|
||||||
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}):
|
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}):
|
||||||
movie.extract()
|
movie.extract()
|
||||||
|
for iframe in tag.findAll('iframe') :
|
||||||
|
iframe.extract()
|
||||||
|
|
||||||
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
|
fresh_soup = self.getFreshSoup(soup)
|
||||||
body = homeMadeSoup.find('body')
|
fresh_soup.body.append(tag)
|
||||||
body.append(tag)
|
|
||||||
|
|
||||||
return homeMadeSoup
|
return fresh_soup
|
||||||
else:
|
else:
|
||||||
# This should never happen and other famous last words...
|
# This should never happen and other famous last words...
|
||||||
return soup
|
return soup
|
||||||
else :
|
|
||||||
# It's a TOC, return the whole lot.
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def postproces_html(self, soup) :
|
|
||||||
# Should not happen, but it does. Slows down my Sony eReader
|
|
||||||
for img in soup.findAll('img') :
|
|
||||||
if img['src'].startswith('http://') :
|
|
||||||
img.extract()
|
|
||||||
|
|
||||||
# Happens for some movies which we are not able to view anyway
|
|
||||||
for iframe in soup.findAll('iframe') :
|
|
||||||
if iframe['src'].startswith('http://') :
|
|
||||||
iframe.extract()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def getFreshSoup(self, oldSoup):
|
||||||
|
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
|
||||||
|
if oldSoup.head.title:
|
||||||
|
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
|
||||||
|
return freshSoup
|
||||||
|
|
||||||
|
@ -10,11 +10,12 @@ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
|
|||||||
ORG_NAME = 'KovidsBrain'
|
ORG_NAME = 'KovidsBrain'
|
||||||
APP_UID = 'libprs500'
|
APP_UID = 'libprs500'
|
||||||
from calibre import islinux, iswindows, isosx
|
from calibre import islinux, iswindows, isosx
|
||||||
from calibre.utils.config import Config, ConfigProxy, dynamic
|
from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
|
||||||
from calibre.utils.localization import set_qt_translator
|
from calibre.utils.localization import set_qt_translator
|
||||||
from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
|
from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
|
gprefs = JSONConfig('gui')
|
||||||
|
|
||||||
NONE = QVariant() #: Null value to return from the data function of item models
|
NONE = QVariant() #: Null value to return from the data function of item models
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ from calibre.utils.ipc.server import Server
|
|||||||
from calibre.gui2 import warning_dialog, choose_files, error_dialog, \
|
from calibre.gui2 import warning_dialog, choose_files, error_dialog, \
|
||||||
question_dialog,\
|
question_dialog,\
|
||||||
pixmap_to_data, choose_dir, \
|
pixmap_to_data, choose_dir, \
|
||||||
Dispatcher, \
|
Dispatcher, gprefs, \
|
||||||
available_height, \
|
available_height, \
|
||||||
max_available_height, config, info_dialog, \
|
max_available_height, config, info_dialog, \
|
||||||
available_width, GetMetadata
|
available_width, GetMetadata
|
||||||
@ -518,7 +518,21 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
self.connect(self.library_view.model(), SIGNAL('count_changed(int)'),
|
self.connect(self.library_view.model(), SIGNAL('count_changed(int)'),
|
||||||
self.tags_view.recount)
|
self.tags_view.recount)
|
||||||
self.connect(self.search, SIGNAL('cleared()'), self.tags_view.clear)
|
self.connect(self.search, SIGNAL('cleared()'), self.tags_view.clear)
|
||||||
|
if not gprefs.get('quick_start_guide_added', False):
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember'])
|
||||||
|
mi.author_sort = 'Schember, John'
|
||||||
|
mi.comments = "A guide to get you up an running with calibre"
|
||||||
|
mi.publisher = 'calibre'
|
||||||
|
self.library_view.model().add_books([P('quick_start.epub')], ['epub'],
|
||||||
|
[mi])
|
||||||
|
gprefs['quick_start_guide_added'] = True
|
||||||
|
self.library_view.model().books_added(1)
|
||||||
|
if hasattr(self, 'db_images'):
|
||||||
|
self.db_images.reset()
|
||||||
|
|
||||||
self.library_view.model().count_changed()
|
self.library_view.model().count_changed()
|
||||||
|
|
||||||
########################### Cover Flow ################################
|
########################### Cover Flow ################################
|
||||||
self.cover_flow = None
|
self.cover_flow = None
|
||||||
if CoverFlow is not None:
|
if CoverFlow is not None:
|
||||||
@ -1008,7 +1022,6 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
return
|
return
|
||||||
self._add_books(books, to_device)
|
self._add_books(books, to_device)
|
||||||
|
|
||||||
|
|
||||||
def _add_books(self, paths, to_device, on_card=None):
|
def _add_books(self, paths, to_device, on_card=None):
|
||||||
if on_card is None:
|
if on_card is None:
|
||||||
on_card = 'carda' if self.stack.currentIndex() == 2 else 'cardb' if self.stack.currentIndex() == 3 else None
|
on_card = 'carda' if self.stack.currentIndex() == 2 else 'cardb' if self.stack.currentIndex() == 3 else None
|
||||||
|
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
'''
|
'''
|
||||||
Manage application-wide preferences.
|
Manage application-wide preferences.
|
||||||
'''
|
'''
|
||||||
import os, re, cPickle, textwrap, traceback, plistlib
|
import os, re, cPickle, textwrap, traceback, plistlib, json
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from optparse import OptionParser as _OptionParser
|
from optparse import OptionParser as _OptionParser
|
||||||
@ -564,23 +564,31 @@ class XMLConfig(dict):
|
|||||||
data types.
|
data types.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
EXTENSION = '.plist'
|
||||||
|
|
||||||
def __init__(self, rel_path_to_cf_file):
|
def __init__(self, rel_path_to_cf_file):
|
||||||
dict.__init__(self)
|
dict.__init__(self)
|
||||||
self.file_path = os.path.join(config_dir,
|
self.file_path = os.path.join(config_dir,
|
||||||
*(rel_path_to_cf_file.split('/')))
|
*(rel_path_to_cf_file.split('/')))
|
||||||
self.file_path = os.path.abspath(self.file_path)
|
self.file_path = os.path.abspath(self.file_path)
|
||||||
if not self.file_path.endswith('.plist'):
|
if not self.file_path.endswith(self.EXTENSION):
|
||||||
self.file_path += '.plist'
|
self.file_path += self.EXTENSION
|
||||||
|
|
||||||
self.refresh()
|
self.refresh()
|
||||||
|
|
||||||
|
def raw_to_object(self, raw):
|
||||||
|
return plistlib.readPlistFromString(raw)
|
||||||
|
|
||||||
|
def to_raw(self):
|
||||||
|
return plistlib.writePlistToString(self)
|
||||||
|
|
||||||
def refresh(self):
|
def refresh(self):
|
||||||
d = {}
|
d = {}
|
||||||
if os.path.exists(self.file_path):
|
if os.path.exists(self.file_path):
|
||||||
with ExclusiveFile(self.file_path) as f:
|
with ExclusiveFile(self.file_path) as f:
|
||||||
raw = f.read()
|
raw = f.read()
|
||||||
try:
|
try:
|
||||||
d = plistlib.readPlistFromString(raw) if raw.strip() else {}
|
d = self.raw_to_object(raw) if raw.strip() else {}
|
||||||
except SystemError:
|
except SystemError:
|
||||||
pass
|
pass
|
||||||
except:
|
except:
|
||||||
@ -618,11 +626,21 @@ class XMLConfig(dict):
|
|||||||
if not os.path.exists(dpath):
|
if not os.path.exists(dpath):
|
||||||
os.makedirs(dpath, mode=CONFIG_DIR_MODE)
|
os.makedirs(dpath, mode=CONFIG_DIR_MODE)
|
||||||
with ExclusiveFile(self.file_path) as f:
|
with ExclusiveFile(self.file_path) as f:
|
||||||
raw = plistlib.writePlistToString(self)
|
raw = self.to_raw()
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
f.truncate()
|
f.truncate()
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
|
|
||||||
|
class JSONConfig(XMLConfig):
|
||||||
|
|
||||||
|
EXTENSION = '.json'
|
||||||
|
|
||||||
|
def raw_to_object(self, raw):
|
||||||
|
return json.loads(raw.decode('utf-8'))
|
||||||
|
|
||||||
|
def to_raw(self):
|
||||||
|
return json.dumps(self, indent=2)
|
||||||
|
|
||||||
|
|
||||||
def _prefs():
|
def _prefs():
|
||||||
c = Config('global', 'calibre wide preferences')
|
c = Config('global', 'calibre wide preferences')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user