GwR initial release of Catalog features

This commit is contained in:
GRiker 2010-01-21 09:31:42 -07:00
commit 3024d37142
33 changed files with 1141 additions and 251 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 395 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

BIN
resources/quick_start.epub Normal file

Binary file not shown.

View File

@ -1,7 +1,5 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
''' '''
spectator.org spectator.org
''' '''
@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
class TheAmericanSpectator(BasicNewsRecipe): class TheAmericanSpectator(BasicNewsRecipe):
title = 'The American Spectator' title = 'The American Spectator'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
language = 'en'
description = 'News from USA' description = 'News from USA'
category = 'news, politics, USA, world'
publisher = 'The American Spectator'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = 'en'
INDEX = 'http://spectator.org' INDEX = 'http://spectator.org'
html2lrf_options = [ conversion_options = {
'--comment' , description 'comments' : description
, '--category' , 'news, politics, USA' ,'tags' : category
, '--publisher' , title ,'language' : language
] ,'publisher' : publisher
}
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'class':'post inner'}) dict(name='div', attrs={'class':'post inner'})
@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name='object') dict(name='object')
,dict(name='div', attrs={'class':'col3' }) ,dict(name='div', attrs={'class':['col3','post-options','social']})
,dict(name='div', attrs={'class':'post-options' }) ,dict(name='p' , attrs={'class':['letter-editor','meta']})
,dict(name='p' , attrs={'class':'letter-editor'})
,dict(name='div', attrs={'class':'social' })
] ]
feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')] feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
def get_cover_url(self): def get_cover_url(self):
cover_url = None cover_url = None
@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url + '/print' return url + '/print'
def get_article_url(self, article):
return article.get('guid', None)

View File

@ -0,0 +1,41 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class drivelrycom(BasicNewsRecipe):
title = u'drivelry.com'
language = 'en'
description = 'A blog by Mike Abrahams'
__author__ = 'Krittika Goyal'
oldest_article = 60 #days
max_articles_per_feed = 25
#encoding = 'latin1'
remove_stylesheets = True
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
remove_tags_after = dict(name='div', attrs={'id':'bookmark'})
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['sidebar']}),
dict(name='div', attrs={'id':['bookmark']}),
#dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}),
#dict(name='ul', attrs={'class':'articleTools'}),
]
feeds = [
('drivelry.com',
'http://feeds.feedburner.com/drivelry'),
]
def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'id':'main'})
#td = heading.findParent(name='td')
#td.extract()
soup = BeautifulSoup('''
<html><head><title>t</title></head><body>
<p>To donate to this blog: <a href="http://www.drivelry.com/thank-you/">click here</a></p>
</body></html>
''')
body = soup.find(name='body')
body.insert(0, story)
return soup

View File

@ -1,23 +1,29 @@
#!/usr/bin/python
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class FokkeEnSukkeRecipe(BasicNewsRecipe) : class FokkeEnSukkeRecipe(BasicNewsRecipe) :
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = 'kwetal'
language = 'nl' language = 'nl'
description = u'Popular Dutch daily cartoon Fokke en Sukke' country = 'NL'
version = 2
title = u'Fokke en Sukke' title = u'Fokke en Sukke'
no_stylesheets = True publisher = u'Reid, Geleijnse & Van Tol'
# For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue category = u'News, Cartoons'
# with that: it treats it as content and displays it as is. Setting this property to empty solves this for me. description = u'Popular Dutch daily cartoon Fokke en Sukke'
template_css = ''
INDEX = u'http://foksuk.nl'
# This cover is not as nice as it could be, needs some work conversion_options = {'comments': description, 'language': language, 'publisher': publisher}
#cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
no_stylesheets = True
extra_css = '''
body{font-family: verdana, arial, helvetica, geneva, sans-serif ; margin: 0em; padding: 0em;}
div.title {text-align: center; margin-bottom: 1em;}
'''
INDEX = u'http://foksuk.nl'
cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})] keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
@ -31,15 +37,14 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) :
links = index.findAll('a') links = index.findAll('a')
maxIndex = len(links) - 1 maxIndex = len(links) - 1
articles = [] articles = []
for i in range(len(links)) : for i in range(1, len(links)) :
# The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice. # There can be more than one cartoon for a given day (currently either one or two).
if i == 0 : # If there's only one, there is just a link with the dayname.
continue # If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>.
# In that case we're interested in the last two.
# There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
# If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
if links[i].renderContents() in dayNames : if links[i].renderContents() in dayNames :
# If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content # If the link is not in daynames, we processed it already, but if it is, let's see
# if the next one has '1' as content
if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') : if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
# Got you! Add it to the list # Got you! Add it to the list
article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''} article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''}
@ -59,29 +64,31 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) :
return [[week, articles]] return [[week, articles]]
def preprocess_html(self, soup) : def preprocess_html(self, soup) :
# This method is called for every page, be it cartoon or TOC. We need to process each in their own way
cartoon = soup.find('div', attrs={'class' : 'cartoon'}) cartoon = soup.find('div', attrs={'class' : 'cartoon'})
if cartoon :
# It is a cartoon. Extract the title.
title = ''
img = soup.find('img', attrs = {'alt' : True})
if img :
title = img['alt']
# Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that. title = ''
tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')]) img = soup.find('img', attrs = {'alt' : True})
tag.insert(0, title) if img :
cartoon.insert(0, tag) title = img['alt']
# I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier, tag = Tag(soup, 'div', [('class', 'title')])
# and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook. tag.insert(0, title)
select = cartoon.find('div', attrs={'class' : 'selectcartoon'}) cartoon.insert(0, tag)
if select :
select.extract()
return cartoon # We only want the cartoon, so throw out the index
else : select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
# It is a TOC. Just return the whole lot. if select :
return soup select.extract()
freshSoup = self.getFreshSoup(soup)
freshSoup.body.append(cartoon)
return freshSoup
def getFreshSoup(self, oldSoup):
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
if oldSoup.head.title:
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
return freshSoup

View File

@ -15,7 +15,7 @@ class FTDe(BasicNewsRecipe):
__author__ = 'Oliver Niesner' __author__ = 'Oliver Niesner'
use_embedded_content = False use_embedded_content = False
timefmt = ' [%d %b %Y]' timefmt = ' [%d %b %Y]'
language = 'de' language = _('German')
max_articles_per_feed = 40 max_articles_per_feed = 40
no_stylesheets = True no_stylesheets = True
@ -28,8 +28,13 @@ class FTDe(BasicNewsRecipe):
dict(id='ADS_Top'), dict(id='ADS_Top'),
dict(id='spinner'), dict(id='spinner'),
dict(id='ftd-contentad'), dict(id='ftd-contentad'),
dict(id='ftd-promo'),
dict(id='nava-50009007-1-0'), dict(id='nava-50009007-1-0'),
dict(id='navli-50009007-1-0'), dict(id='navli-50009007-1-0'),
dict(id='Box5000534-0-0-0'),
dict(id='ExpV-1-0-0-1'),
dict(id='ExpV-1-0-0-0'),
dict(id='PollExpV-2-0-0-0'),
dict(id='starRating'), dict(id='starRating'),
dict(id='saveRating'), dict(id='saveRating'),
dict(id='yLayer'), dict(id='yLayer'),
@ -44,14 +49,19 @@ class FTDe(BasicNewsRecipe):
dict(name='ul', attrs={'class':'nav'}), dict(name='ul', attrs={'class':'nav'}),
dict(name='p', attrs={'class':'articleOptionHead'}), dict(name='p', attrs={'class':'articleOptionHead'}),
dict(name='p', attrs={'class':'articleOptionFoot'}), dict(name='p', attrs={'class':'articleOptionFoot'}),
dict(name='p', attrs={'class':'moreInfo'}),
dict(name='div', attrs={'class':'chartBox'}), dict(name='div', attrs={'class':'chartBox'}),
dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}), dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}), dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
dict(name='div', attrs={'class':'box boxNavTabs '}), dict(name='div', attrs={'class':'box boxNavTabs'}),
dict(name='div', attrs={'class':'boxMMRgtLow'}),
dict(name='span', attrs={'class':'vote_455857'}), dict(name='span', attrs={'class':'vote_455857'}),
dict(name='div', attrs={'class':'relatedhalb'}), dict(name='div', attrs={'class':'relatedhalb'}),
dict(name='div', attrs={'class':'box boxListScrollOutline'}), dict(name='div', attrs={'class':'box boxListScrollOutline'}),
dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
dict(name='div', attrs={'class':'box boxTeaser'}),
dict(name='div', attrs={'class':'tagCloud'}), dict(name='div', attrs={'class':'tagCloud'}),
dict(name='div', attrs={'class':'pollView'}),
dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}), dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
dict(name='div', attrs={'class':'ftdHpNav'}), dict(name='div', attrs={'class':'ftdHpNav'}),
dict(name='div', attrs={'class':'ftdHead'}), dict(name='div', attrs={'class':'ftdHead'}),
@ -67,9 +77,10 @@ class FTDe(BasicNewsRecipe):
dict(name='div', attrs={'class':'wertungoben'}), dict(name='div', attrs={'class':'wertungoben'}),
dict(name='div', attrs={'class':'artikelfuss'}), dict(name='div', attrs={'class':'artikelfuss'}),
dict(name='a', attrs={'class':'rating'}), dict(name='a', attrs={'class':'rating'}),
dict(name='a', attrs={'href':'#rt'}),
dict(name='div', attrs={'class':'articleOptionFootFrame'}), dict(name='div', attrs={'class':'articleOptionFootFrame'}),
dict(name='div', attrs={'class':'artikelsplitfaq'})] dict(name='div', attrs={'class':'artikelsplitfaq'})]
remove_tags_after = [dict(name='a', attrs={'class':'more'})] #remove_tags_after = [dict(name='a', attrs={'class':'more'})]
feeds = [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'), feeds = [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'), ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
@ -86,4 +97,4 @@ class FTDe(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url + '?mode=print' return url.replace('.html', '.html?mode=print')

View File

@ -0,0 +1,38 @@
import urllib, re, mechanize
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre import __appname__
class GoogleReaderUber(BasicNewsRecipe):
title = 'Google Reader Uber'
description = 'This recipe downloads all unread feedsfrom your Google Reader account.'
needs_subscription = True
__author__ = 'rollercoaster, davec'
base_url = 'http://www.google.com/reader/atom/'
oldest_article = 365
max_articles_per_feed = 250
get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
use_embedded_content = True
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
('service', 'reader'), ('source', __appname__)])
response = br.open('https://www.google.com/accounts/ClientLogin', request)
sid = re.search('SID=(\S*)', response.read()).group(1)
cookies = mechanize.CookieJar()
br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
cookies.set_cookie(mechanize.Cookie(None, 'SID', sid, None, False, '.google.com', True, True, '/', True, False, None, True, '', '', None))
return br
def get_feeds(self):
feeds = []
soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
for id in soup.findAll(True, attrs={'name':['id']}):
url = id.contents[0].replace('broadcast','reading-list')
feeds.append((re.search('/([^/]*)$', url).group(1),
self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
return feeds

View File

@ -0,0 +1,91 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
import re
class JoopRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'nl'
country = 'NL'
version = 1
title = u'Joop'
publisher = u'Vara'
category = u'News, Politics, Discussion'
description = u'Political blog from the Netherlands'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'author_head clearfix photo'}))
keep_only_tags.append(dict(name = 'h2', attrs = {'class': 'columnhead smallline'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': re.compile('article.*')}))
extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
img {margin-right: 0.4em;}
h3 {font-size: medium; font-style: italic; font-weight: normal;}
h2 {font-size: xx-large; font-weight: bold}
sub {color: #666666; font-size: x-small; font-weight: normal;}
div.joop_byline {font-size: large}
div.joop_byline_job {font-size: small; color: #696969;}
div.joop_date {font-size: x-small; font-style: italic; margin-top: 0.6em}
'''
INDEX = 'http://www.joop.nl'
conversion_options = {'comments': description, 'tags': category, 'language': language,
'publisher': publisher}
def parse_index(self):
sections = ['Politiek', 'Wereld', 'Economie', 'Groen', 'Media', 'Leven', 'Show', 'Opinies']
soup = self.index_to_soup(self.INDEX)
answer = []
div = soup.find('div', attrs = {'id': 'footer'})
for section in sections:
articles = []
h2 = div.find(lambda tag: tag.name == 'h2' and tag.renderContents() == section)
if h2:
ul = h2.findNextSibling('ul', 'linklist')
if ul:
for li in ul.findAll('li'):
title = self.tag_to_string(li.a)
url = self.INDEX + li.a['href']
articles.append({'title': title, 'date': None, 'url': url, 'description': ''})
answer.append((section, articles))
return answer
def preprocess_html(self, soup):
div = soup.find('div', 'author_head clearfix photo')
if div:
h2 = soup.find('h2')
if h2:
h2.name = 'div'
h2['class'] = 'joop_byline'
span = h2.find('span')
if span:
span.name = 'div'
span['class'] = 'joop_byline_job'
div.replaceWith(h2)
h2 = soup.find('h2', attrs = {'class': 'columnhead smallline'})
if h2:
txt = None
span = h2.find('span', 'info')
if span:
txt = span.find(text = True)
div = Tag(soup, 'div', attrs = [('class', 'joop_date')])
div.append(txt)
h2.replaceWith(div)
return soup

View File

@ -0,0 +1,44 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.kitsapun.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Kitsapsun(BasicNewsRecipe):
title = 'Kitsap Sun'
__author__ = 'Darko Miletic'
description = 'News from Kitsap County'
publisher = 'Scripps Interactive Newspapers Group'
category = 'news, Kitsap county, USA'
language = 'en'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher': publisher
}
keep_only_tags = [dict(name='div', attrs={'id':['story_meta','story_content']})]
remove_tags = [dict(name=['object','link','embed','form','iframe'])]
feeds = [
(u'News' , u'http://www.kitsapsun.com/rss/headlines/news/' )
,(u'Business' , u'http://www.kitsapsun.com/rss/headlines/business/' )
,(u'Communities' , u'http://www.kitsapsun.com/rss/headlines/communities/' )
,(u'Entertainment', u'http://www.kitsapsun.com/rss/headlines/entertainment/')
,(u'Lifestyles' , u'http://www.kitsapsun.com/rss/headlines/lifestyles/' )
]
def print_version(self, url):
return url.rpartition('/')[0] + '/?print=1'

View File

@ -0,0 +1,79 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01'
__date__ = '14, January 2010'
__description__ = 'Canadian Paper '
'''
http://www.ledevoir.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ledevoir(BasicNewsRecipe):
author = 'Lorenzo Vigentini'
description = 'Canadian Paper'
cover_url = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
title = u'Le Devoir'
publisher = 'leDevoir.com'
category = 'News, finance, economy, politics'
language = 'fr'
encoding = 'utf-8'
timefmt = '[%a, %d %b, %Y]'
max_articles_per_feed = 50
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
keep_only_tags = [
dict(name='div', attrs={'id':'article'}),
dict(name='ul', attrs={'id':'ariane'})
]
remove_tags = [
dict(name='div', attrs={'id':'dialog'}),
dict(name='div', attrs={'class':['interesse_actions','reactions']}),
dict(name='ul', attrs={'class':'mots_cles'}),
dict(name='a', attrs={'class':'haut'}),
dict(name='h5', attrs={'class':'interesse_actions'})
]
feeds = [
(u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
(u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
(u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
(u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
(u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
(u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
(u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
(u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
(u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
(u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
(u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
(u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
]
extra_css = '''
h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
.specs {line-height:1em;margin:1px 0;}
.specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
.specs span.auteur a,
.specs span.auteur span {text-transform:uppercase;color:#787878;}
.specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
ul#ariane li {display:inline;}
ul#ariane a {color:#2E2E2E;text-decoration:underline;}
.credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
.texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
'''

View File

@ -70,11 +70,28 @@ class NYTimes(BasicNewsRecipe):
feeds.append((current_section, current_articles)) feeds.append((current_section, current_articles))
return feeds return feeds
def preprocess_html(self, soup): def preprocess_html(self, soup):
story = soup.find(name='div', attrs={'class':'triline'}) story = soup.find(name='div', attrs={'class':'triline'})
#td = heading.findParent(name='td') page2_link = soup.find('p','pagenav')
#td.extract() if page2_link:
atag = page2_link.find('a',href=True)
if atag:
page2_url = atag['href']
if page2_url.startswith('story'):
page2_url = 'http://www.nationalpost.com/todays-paper/'+page2_url
elif page2_url.startswith( '/todays-paper/story.html'):
page2_url = 'http://www.nationalpost.com/'+page2_url
page2_soup = self.index_to_soup(page2_url)
if page2_soup:
page2_content = page2_soup.find('div','story-content')
if page2_content:
full_story = BeautifulSoup('<div></div>')
full_story.insert(0,story)
full_story.insert(1,page2_content)
story = full_story
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>') soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
body = soup.find(name='body') body = soup.find(name='body')
body.insert(0, story) body.insert(0, story)
return soup return soup

View File

@ -1,29 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class NrcNextRecipe(BasicNewsRecipe): class NrcNextRecipe(BasicNewsRecipe):
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'kwetal' __author__ = 'kwetal'
version = 1
language = 'nl' language = 'nl'
country = 'NL'
version = 2
title = u'nrcnext'
publisher = u'NRC Media'
category = u'News, Opinion, the Netherlands'
description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.' description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.'
title = u'nrcnext'
conversion_options = {'comments': description, 'language': language, 'publisher': publisher}
no_stylesheets = True no_stylesheets = True
template_css = '' remove_javascript = True
# I want to do some special processing on the articles. I could not solve it with the 'extra_css' property . So we do it the hard way.
keep_only_tags = [dict(name='div', attrs={'id' : 'main'})] keep_only_tags = [dict(name='div', attrs={'id' : 'main'})]
# If that's overkill for you comment out the previous line and uncomment the next. Then get rid of the preprocess_html() method.
#keep_only_tags = [dict(name='div', attrs={'class' : 'post'}), dict(name='div', attrs={'class' : 'vlag'}) ]
remove_tags = [dict(name = 'div', attrs = {'class' : 'meta'}), remove_tags = []
dict(name = 'div', attrs = {'class' : 'datumlabel'}), remove_tags.append(dict(name = 'div', attrs = {'class' : 'meta'}))
dict(name = 'ul', attrs = {'class' : 'cats single'}), remove_tags.append(dict(name = 'div', attrs = {'class' : 'datumlabel'}))
dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}), remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats single'}))
dict(name = 'ul', attrs = {'class' : 'cats rubrieken'})] remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}))
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats rubrieken'}))
use_embedded_content = False extra_css = '''
body {font-family: verdana, arial, helvetica, geneva, sans-serif; text-align: left;}
p.wp-caption-text {font-size: x-small; color: #666666;}
h2.sub_title {font-size: medium; color: #696969;}
h2.vlag {font-size: small; font-weight: bold;}
'''
def parse_index(self) : def parse_index(self) :
# Use the wesbite as an index. Their RSS feeds can be out of date. # Use the wesbite as an index. Their RSS feeds can be out of date.
@ -44,10 +53,11 @@ class NrcNextRecipe(BasicNewsRecipe):
# Find the links to the actual articles and rember the location they're pointing to and the title # Find the links to the actual articles and rember the location they're pointing to and the title
a = post.find('a', attrs={'rel' : 'bookmark'}) a = post.find('a', attrs={'rel' : 'bookmark'})
href = a['href'] href = a['href']
title = a.renderContents() title = self.tag_to_string(a)
if index == 'columnisten' : if index == 'columnisten' :
# In this feed/page articles can be written by more than one author. It is nice to see their names in the titles. # In this feed/page articles can be written by more than one author.
# It is nice to see their names in the titles.
flag = post.find('h2', attrs = {'class' : 'vlag'}) flag = post.find('h2', attrs = {'class' : 'vlag'})
author = flag.contents[0].renderContents() author = flag.contents[0].renderContents()
completeTitle = u''.join([author, u': ', title]) completeTitle = u''.join([author, u': ', title])
@ -71,44 +81,46 @@ class NrcNextRecipe(BasicNewsRecipe):
return answer return answer
def preprocess_html(self, soup) : def preprocess_html(self, soup) :
# This method is called for every page, be it cartoon or TOC. We need to process each in their own way if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}):
if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}) :
# It's an article, find the interesting part
tag = soup.find('div', attrs = {'class' : 'post'}) tag = soup.find('div', attrs = {'class' : 'post'})
if tag : if tag:
# And replace any links with their text, so they don't show up underlined on my reader. h2 = tag.find('h2', 'vlag')
for link in tag.findAll('a') : if h2:
link.replaceWith(link.renderContents()) new_h2 = Tag(soup, 'h2', attrs = [('class', 'vlag')])
new_h2.append(self.tag_to_string(h2))
h2.replaceWith(new_h2)
else:
h2 = tag.find('h2')
if h2:
new_h2 = Tag(soup, 'h2', attrs = [('class', 'sub_title')])
new_h2.append(self.tag_to_string(h2))
h2.replaceWith(new_h2)
# Slows down my Sony reader; feel free to comment out h1 = tag.find('h1')
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}) : if h1:
new_h1 = Tag(soup, 'h1')
new_h1.append(self.tag_to_string(h1))
h1.replaceWith(new_h1)
# Slows down my reader.
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}):
movie.extract() movie.extract()
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}) : for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}):
movie.extract() movie.extract()
for iframe in tag.findAll('iframe') :
iframe.extract()
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>') fresh_soup = self.getFreshSoup(soup)
body = homeMadeSoup.find('body') fresh_soup.body.append(tag)
body.append(tag)
return homeMadeSoup return fresh_soup
else : else:
# This should never happen and other famous last words... # This should never happen and other famous last words...
return soup return soup
else :
# It's a TOC, return the whole lot.
return soup
def postproces_html(self, soup) :
# Should not happen, but it does. Slows down my Sony eReader
for img in soup.findAll('img') :
if img['src'].startswith('http://') :
img.extract()
# Happens for some movies which we are not able to view anyway
for iframe in soup.findAll('iframe') :
if iframe['src'].startswith('http://') :
iframe.extract()
def getFreshSoup(self, oldSoup):
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
if oldSoup.head.title:
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
return freshSoup

View File

@ -0,0 +1,125 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class YemenTimesRecipe(BasicNewsRecipe):
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'en_YE'
country = 'YE'
version = 1
title = u'Yemen Times'
publisher = u'yementimes.com'
category = u'News, Opinion, Yemen'
description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
encoding = 'utf-8'
remove_empty_feeds = True
no_stylesheets = True
remove_javascript = True
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'ctl00_ContentPlaceHolder1_MAINNEWS0_Panel1',
'class': 'DMAIN2'}))
remove_attributes = ['style']
INDEX = 'http://www.yementimes.com/'
feeds = []
feeds.append((u'Our Viewpoint', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=6&pnm=OUR%20VIEWPOINT'))
feeds.append((u'Local News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=3&pnm=Local%20news'))
feeds.append((u'Their News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=80&pnm=Their%20News'))
feeds.append((u'Report', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=8&pnm=report'))
feeds.append((u'Health', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=51&pnm=health'))
feeds.append((u'Interview', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=77&pnm=interview'))
feeds.append((u'Opinion', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=7&pnm=opinion'))
feeds.append((u'Business', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=5&pnm=business'))
feeds.append((u'Op-Ed', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=81&pnm=Op-Ed'))
feeds.append((u'Culture', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=75&pnm=Culture'))
feeds.append((u'Readers View', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=4&pnm=Readers%20View'))
feeds.append((u'Variety', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=9&pnm=Variety'))
feeds.append((u'Education', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=57&pnm=Education'))
extra_css = '''
body {font-family:verdana, arial, helvetica, geneva, sans-serif;}
div.yemen_byline {font-size: medium; font-weight: bold;}
div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;}
.yemen_caption {font-size: x-small; font-style: italic; color: #696969;}
'''
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
'publisher': publisher, 'linearize_tables': True}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.set_handle_gzip(True)
return br
def parse_index(self):
answer = []
for feed_title, feed in self.feeds:
soup = self.index_to_soup(feed)
newsbox = soup.find('div', 'newsbox')
main = newsbox.findNextSibling('table')
articles = []
for li in main.findAll('li'):
title = self.tag_to_string(li.a)
url = self.INDEX + li.a['href']
articles.append({'title': title, 'date': None, 'url': url, 'description': '<br/>&nbsp;'})
answer.append((feed_title, articles))
return answer
def preprocess_html(self, soup):
freshSoup = self.getFreshSoup(soup)
headline = soup.find('div', attrs = {'id': 'DVMTIT'})
if headline:
div = headline.findNext('div', attrs = {'id': 'DVTOP'})
img = None
if div:
img = div.find('img')
headline.name = 'h1'
freshSoup.body.append(headline)
if img is not None:
freshSoup.body.append(img)
byline = soup.find('div', attrs = {'id': 'DVTIT'})
if byline:
date_el = byline.find('span')
if date_el:
pub_date = self.tag_to_string(date_el)
date = Tag(soup, 'div', attrs = [('class', 'yemen_date')])
date.append(pub_date)
date_el.extract()
raw = '<br/>'.join(['%s' % (part) for part in byline.findAll(text = True)])
author = BeautifulSoup('<div class="yemen_byline">' + raw + '</div>')
if date is not None:
freshSoup.body.append(date)
freshSoup.body.append(author)
story = soup.find('div', attrs = {'id': 'DVDET'})
if story:
for table in story.findAll('table'):
if table.find('img'):
table['class'] = 'yemen_caption'
freshSoup.body.append(story)
return freshSoup
def getFreshSoup(self, oldSoup):
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
if oldSoup.head.title:
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
return freshSoup

View File

@ -2,11 +2,12 @@ from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, sys, tempfile, zipfile import atexit, os, shutil, sys, tempfile, zipfile
from calibre.constants import numeric_version from calibre.constants import numeric_version
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
class Plugin(object): class Plugin(object):
''' '''
A calibre plugin. Useful members include: A calibre plugin. Useful members include:
@ -231,6 +232,8 @@ class CatalogPlugin(Plugin):
A plugin that implements a catalog generator. A plugin that implements a catalog generator.
''' '''
resources_path = None
#: Output file type for which this plugin should be run #: Output file type for which this plugin should be run
#: For example: 'epub' or 'xml' #: For example: 'epub' or 'xml'
file_types = set([]) file_types = set([])
@ -249,22 +252,18 @@ class CatalogPlugin(Plugin):
cli_options = [] cli_options = []
def cleanup(self, path):
try:
import os, shutil
if os.path.exists(path):
shutil.rmtree(path)
except:
pass
def search_sort_db(self, db, opts): def search_sort_db(self, db, opts):
if opts.search_text:
# If declared, --ids overrides any declared search criteria
if not opts.ids and opts.search_text:
db.search(opts.search_text) db.search(opts.search_text)
if opts.sort_by: if opts.sort_by:
# 2nd arg = ascending # 2nd arg = ascending
db.sort(opts.sort_by, True) db.sort(opts.sort_by, True)
return db.get_data_as_dict() return db.get_data_as_dict(ids=opts.ids)
def get_output_fields(self, opts): def get_output_fields(self, opts):
# Return a list of requested fields, with opts.sort_by first # Return a list of requested fields, with opts.sort_by first
@ -280,8 +279,10 @@ class CatalogPlugin(Plugin):
fields = list(all_fields & requested_fields) fields = list(all_fields & requested_fields)
else: else:
fields = list(all_fields) fields = list(all_fields)
fields.sort() fields.sort()
fields.insert(0,fields.pop(int(fields.index(opts.sort_by)))) if opts.sort_by:
fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
return fields return fields
def initialize(self): def initialize(self):
@ -291,35 +292,27 @@ class CatalogPlugin(Plugin):
Tab will be dynamically generated and added to the Catalog Options dialog in Tab will be dynamically generated and added to the Catalog Options dialog in
calibre.gui2.dialogs.catalog.py:Catalog calibre.gui2.dialogs.catalog.py:Catalog
''' '''
import atexit
from calibre.customize.builtins import plugins as builtin_plugins from calibre.customize.builtins import plugins as builtin_plugins
from calibre.customize.ui import config
from calibre.ptempfile import PersistentTemporaryDirectory
if type(self) in builtin_plugins: if not type(self) in builtin_plugins and \
print "%s: Built-in Catalog plugin, no init necessary" % self.name not self.name in config['disabled_plugins']:
else:
print "%s: User-added plugin" % self.name
print " Copying .ui and .py resources from %s to tmpdir" % self.plugin_path
# Generate a list of resource files to extract from the zipped plugin
# Copy to tmpdir/calibre_plugin_resources
files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]] files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]]
print " files_to_copy: %s" % files_to_copy
resources = zipfile.ZipFile(self.plugin_path,'r') resources = zipfile.ZipFile(self.plugin_path,'r')
temp_resources_path = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources')
if self.resources_path is None:
self.resources_path = PersistentTemporaryDirectory('_plugin_resources', prefix='')
for file in files_to_copy: for file in files_to_copy:
try: try:
resources.extract(file, temp_resources_path) resources.extract(file, self.resources_path)
print " %s extracted to %s" % (file, temp_resources_path)
except: except:
print " %s not found in %s" % (file, os.path.basename(self.plugin_path)) print " customize:__init__.initialize(): %s not found in %s" % (file, os.path.basename(self.plugin_path))
continue
resources.close() resources.close()
# Register temp_resources_path for deletion when calibre exits def run(self, path_to_output, opts, db, ids):
atexit.register(self.cleanup, temp_resources_path)
def run(self, path_to_output, opts, db):
''' '''
Run the plugin. Must be implemented in subclasses. Run the plugin. Must be implemented in subclasses.
It should generate the catalog in the format specified It should generate the catalog in the format specified

View File

@ -14,6 +14,7 @@ Windows PNP strings:
2W00000&1', 3, u'G:\\') 2W00000&1', 3, u'G:\\')
''' '''
import re
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
@ -108,6 +109,7 @@ class POCKETBOOK360(EB600):
OSX_MAIN_MEM = 'Philips Mass Storge Media' OSX_MAIN_MEM = 'Philips Mass Storge Media'
OSX_CARD_A_MEM = 'Philips Mass Storge Media' OSX_CARD_A_MEM = 'Philips Mass Storge Media'
OSX_MAIN_MEM_VOL_PAT = re.compile(r'/Pocket')
@classmethod @classmethod
def can_handle(cls, dev, debug=False): def can_handle(cls, dev, debug=False):

View File

@ -128,6 +128,10 @@ def do_set_metadata(opts, mi, stream, stream_type):
mi.title_sort = title_sort(opts.title) mi.title_sort = title_sort(opts.title)
if getattr(opts, 'tags', None) is not None: if getattr(opts, 'tags', None) is not None:
mi.tags = [t.strip() for t in opts.tags.split(',')] mi.tags = [t.strip() for t in opts.tags.split(',')]
if getattr(opts, 'series', None) is not None:
mi.series = opts.series.strip()
if getattr(opts, 'series_index', None) is not None:
mi.series_index = float(opts.series_index.strip())
if getattr(opts, 'cover', None) is not None: if getattr(opts, 'cover', None) is not None:
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper() ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()

View File

@ -25,12 +25,14 @@ def get_document_info(stream):
while not found: while not found:
prefix = block[-6:] prefix = block[-6:]
block = prefix + stream.read(block_size) block = prefix + stream.read(block_size)
actual_block_size = len(block) - len(prefix)
if len(block) == len(prefix): if len(block) == len(prefix):
break break
idx = block.find(r'{\info') idx = block.find(r'{\info')
if idx >= 0: if idx >= 0:
found = True found = True
stream.seek(stream.tell() - block_size + idx - len(prefix)) pos = stream.tell() - actual_block_size + idx - len(prefix)
stream.seek(pos)
else: else:
if block.find(r'\sect') > -1: if block.find(r'\sect') > -1:
break break

View File

@ -90,7 +90,10 @@ class DetectStructure(object):
mark = etree.Element(XHTML('div'), style=page_break_after) mark = etree.Element(XHTML('div'), style=page_break_after)
else: # chapter_mark == 'both': else: # chapter_mark == 'both':
mark = etree.Element(XHTML('hr'), style=page_break_before) mark = etree.Element(XHTML('hr'), style=page_break_before)
elem.addprevious(mark) try:
elem.addprevious(mark)
except TypeError:
self.log.exception('Failed to mark chapter')
def create_level_based_toc(self): def create_level_based_toc(self):
if self.opts.level1_toc is None: if self.opts.level1_toc is None:

View File

@ -20,6 +20,10 @@ class Font(object):
class Column(object): class Column(object):
# A column contains an element is the element bulges out to
# the left or the right by at most HFUZZ*col width.
HFUZZ = 0.2
def __init__(self): def __init__(self):
self.left = self.right = self.top = self.bottom = 0 self.left = self.right = self.top = self.bottom = 0
self.width = self.height = 0 self.width = self.height = 0
@ -41,6 +45,10 @@ class Column(object):
for x in self.elements: for x in self.elements:
yield x yield x
def contains(self, elem):
return elem.left > self.left - self.HFUZZ*self.width and \
elem.right < self.right + self.HFUZZ*self.width
class Element(object): class Element(object):
def __eq__(self, other): def __eq__(self, other):
@ -238,11 +246,10 @@ class Page(object):
return columns return columns
def find_elements_in_row_of(self, x): def find_elements_in_row_of(self, x):
interval = Interval(x.top - self.YFUZZ * self.average_text_height, interval = Interval(x.top,
x.top + self.YFUZZ*(1+self.average_text_height)) x.top + self.YFUZZ*(1+self.average_text_height))
h_interval = Interval(x.left, x.right) h_interval = Interval(x.left, x.right)
m = max(0, x.idx-15) for y in self.elements[x.idx:x.idx+15]:
for y in self.elements[m:x.idx+15]:
if y is not x: if y is not x:
y_interval = Interval(y.top, y.bottom) y_interval = Interval(y.top, y.bottom)
x_interval = Interval(y.left, y.right) x_interval = Interval(y.left, y.right)

View File

@ -169,6 +169,21 @@ class RTFInput(InputFormatPlugin):
with open('styles.css', 'ab') as f: with open('styles.css', 'ab') as f:
f.write(css) f.write(css)
def preprocess(self, fname):
self.log('\tPreprocessing to convert unicode characters')
try:
data = open(fname, 'rb').read()
from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
tokenizer = RtfTokenizer(data)
tokens = RtfTokenParser(tokenizer.tokens)
data = tokens.toRTF()
fname = 'preprocessed.rtf'
with open(fname, 'wb') as f:
f.write(data)
except:
self.log.exception(
'Failed to preprocess RTF to convert unicode sequences, ignoring...')
return fname
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
@ -177,8 +192,9 @@ class RTFInput(InputFormatPlugin):
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
self.log = log self.log = log
self.log('Converting RTF to XML...') self.log('Converting RTF to XML...')
fname = self.preprocess(stream.name)
try: try:
xml = self.generate_xml(stream.name) xml = self.generate_xml(fname)
except RtfInvalidCodeException: except RtfInvalidCodeException:
raise ValueError(_('This RTF file has a feature calibre does not ' raise ValueError(_('This RTF file has a feature calibre does not '
'support. Convert it to HTML first and then try it.')) 'support. Convert it to HTML first and then try it.'))

View File

@ -0,0 +1,344 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2010, Gerendi Sandor Attila'
__docformat__ = 'restructuredtext en'
"""
RTF tokenizer and token parser. v.1.0 (1/17/2010)
Author: Gerendi Sandor Attila
At this point this will tokenize a RTF file then rebuild it from the tokens.
In the process the UTF8 tokens are altered to be supported by the RTF2XML and also remain RTF specification compilant.
"""
class tokenDelimitatorStart():
def __init__(self):
pass
def toRTF(self):
return b'{'
def __repr__(self):
return '{'
class tokenDelimitatorEnd():
def __init__(self):
pass
def toRTF(self):
return b'}'
def __repr__(self):
return '}'
class tokenControlWord():
def __init__(self, name, separator = ''):
self.name = name
self.separator = separator
def toRTF(self):
return self.name + self.separator
def __repr__(self):
return self.name + self.separator
class tokenControlWordWithNumericArgument():
def __init__(self, name, argument, separator = ''):
self.name = name
self.argument = argument
self.separator = separator
def toRTF(self):
return self.name + repr(self.argument) + self.separator
def __repr__(self):
return self.name + repr(self.argument) + self.separator
class tokenControlSymbol():
def __init__(self, name):
self.name = name
def toRTF(self):
return self.name
def __repr__(self):
return self.name
class tokenData():
def __init__(self, data):
self.data = data
def toRTF(self):
return self.data
def __repr__(self):
return self.data
class tokenBinN():
def __init__(self, data, separator = ''):
self.data = data
self.separator = separator
def toRTF(self):
return "\\bin" + repr(len(self.data)) + self.separator + self.data
def __repr__(self):
return "\\bin" + repr(len(self.data)) + self.separator + self.data
class token8bitChar():
def __init__(self, data):
self.data = data
def toRTF(self):
return "\\'" + self.data
def __repr__(self):
return "\\'" + self.data
class tokenUnicode():
def __init__(self, data, separator = '', current_ucn = 1, eqList = []):
self.data = data
self.separator = separator
self.current_ucn = current_ucn
self.eqList = eqList
def toRTF(self):
result = '\\u' + repr(self.data) + ' '
ucn = self.current_ucn
if len(self.eqList) < ucn:
ucn = len(self.eqList)
result = tokenControlWordWithNumericArgument('\\uc', ucn).toRTF() + result
i = 0
for eq in self.eqList:
if i >= ucn:
break
result = result + eq.toRTF()
return result
def __repr__(self):
return '\\u' + repr(self.data)
def isAsciiLetter(value):
return ((value >= 'a') and (value <= 'z')) or ((value >= 'A') and (value <= 'Z'))
def isDigit(value):
return (value >= '0') and (value <= '9')
def isChar(value, char):
return value == char
def isString(buffer, string):
return buffer == string
class RtfTokenParser():
def __init__(self, tokens):
self.tokens = tokens
self.process()
self.processUnicode()
def process(self):
i = 0
newTokens = []
while i < len(self.tokens):
if isinstance(self.tokens[i], tokenControlSymbol):
if isString(self.tokens[i].name, "\\'"):
i = i + 1
if not isinstance(self.tokens[i], tokenData):
raise BaseException('Error: token8bitChar without data.')
if len(self.tokens[i].data) < 2:
raise BaseException('Error: token8bitChar without data.')
newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
if len(self.tokens[i].data) > 2:
newTokens.append(tokenData(self.tokens[i].data[2:]))
i = i + 1
continue
newTokens.append(self.tokens[i])
i = i + 1
self.tokens = list(newTokens)
def processUnicode(self):
i = 0
newTokens = []
ucNbStack = [1]
while i < len(self.tokens):
if isinstance(self.tokens[i], tokenDelimitatorStart):
ucNbStack.append(ucNbStack[len(ucNbStack) - 1])
newTokens.append(self.tokens[i])
i = i + 1
continue
if isinstance(self.tokens[i], tokenDelimitatorEnd):
ucNbStack.pop()
newTokens.append(self.tokens[i])
i = i + 1
continue
if isinstance(self.tokens[i], tokenControlWordWithNumericArgument):
if isString(self.tokens[i].name, '\\uc'):
ucNbStack[len(ucNbStack) - 1] = self.tokens[i].argument
newTokens.append(self.tokens[i])
i = i + 1
continue
if isString(self.tokens[i].name, '\\u'):
x = i
j = 0
i = i + 1
replace = []
partialData = None
ucn = ucNbStack[len(ucNbStack) - 1]
while (i < len(self.tokens)) and (j < ucn):
if isinstance(self.tokens[i], tokenDelimitatorStart):
break
if isinstance(self.tokens[i], tokenDelimitatorEnd):
break
if isinstance(self.tokens[i], tokenData):
if len(self.tokens[i].data) >= ucn - j:
replace.append(tokenData(self.tokens[i].data[0 : ucn - j]))
if len(self.tokens[i].data) > ucn - j:
partialData = tokenData(self.tokens[i].data[ucn - j:])
i = i + 1
break
else:
replace.append(self.tokens[i])
j = j + len(self.tokens[i].data)
i = i + 1
continue
if isinstance(self.tokens[i], token8bitChar) or isinstance(self.tokens[i], tokenBinN):
replace.append(self.tokens[i])
i = i + 1
j = j + 1
continue
raise BaseException('Error: incorect utf replacement.')
#calibre rtf2xml does not support utfreplace
replace = []
newTokens.append(tokenUnicode(self.tokens[x].argument, self.tokens[x].separator, ucNbStack[len(ucNbStack) - 1], replace))
if partialData != None:
newTokens.append(partialData)
continue
newTokens.append(self.tokens[i])
i = i + 1
self.tokens = list(newTokens)
def toRTF(self):
result = []
for token in self.tokens:
result.append(token.toRTF())
return "".join(result)
class RtfTokenizer():
def __init__(self, rtfData):
self.rtfData = []
self.tokens = []
self.rtfData = rtfData
self.tokenize()
def tokenize(self):
i = 0
lastDataStart = -1
while i < len(self.rtfData):
if isChar(self.rtfData[i], '{'):
if lastDataStart > -1:
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
lastDataStart = -1
self.tokens.append(tokenDelimitatorStart())
i = i + 1
continue
if isChar(self.rtfData[i], '}'):
if lastDataStart > -1:
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
lastDataStart = -1
self.tokens.append(tokenDelimitatorEnd())
i = i + 1
continue
if isChar(self.rtfData[i], '\\'):
if i + 1 >= len(self.rtfData):
raise BaseException('Error: Control character found at the end of the document.')
if lastDataStart > -1:
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
lastDataStart = -1
tokenStart = i
i = i + 1
#Control Words
if isAsciiLetter(self.rtfData[i]):
#consume <ASCII Letter Sequence>
consumed = False
while i < len(self.rtfData):
if not isAsciiLetter(self.rtfData[i]):
tokenEnd = i
consumed = True
break
i = i + 1
if not consumed:
raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart))
#we have numeric argument before delimiter
if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
#consume the numeric argument
consumed = False
l = 0
while i < len(self.rtfData):
if not isDigit(self.rtfData[i]):
consumed = True
break
l = l + 1
i = i + 1
if l > 10 :
raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
if not consumed:
raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
separator = ''
if isChar(self.rtfData[i], ' '):
separator = ' '
controlWord = self.rtfData[tokenStart: tokenEnd]
if tokenEnd < i:
value = int(self.rtfData[tokenEnd: i])
if isString(controlWord, "\\bin"):
i = i + value
self.tokens.append(tokenBinN(self.rtfData[tokenStart:i], separator))
else:
self.tokens.append(tokenControlWordWithNumericArgument(controlWord, value, separator))
else:
self.tokens.append(tokenControlWord(controlWord, separator))
#space delimiter, we should discard it
if self.rtfData[i] == ' ':
i = i + 1
#Control Symbol
else:
self.tokens.append(tokenControlSymbol(self.rtfData[tokenStart : i + 1]))
i = i + 1
continue
if lastDataStart < 0:
lastDataStart = i
i = i + 1
def toRTF(self):
result = []
for token in self.tokens:
result.append(token.toRTF())
return "".join(result)
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print ("Usage %prog rtfFileToConvert")
sys.exit()
f = open(sys.argv[1], 'rb')
data = f.read()
f.close()
tokenizer = RtfTokenizer(data)
parsedTokens = RtfTokenParser(tokenizer.tokens)
data = parsedTokens.toRTF()
f = open(sys.argv[1], 'w')
f.write(data)
f.close()

View File

@ -10,11 +10,12 @@ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
ORG_NAME = 'KovidsBrain' ORG_NAME = 'KovidsBrain'
APP_UID = 'libprs500' APP_UID = 'libprs500'
from calibre import islinux, iswindows, isosx from calibre import islinux, iswindows, isosx
from calibre.utils.config import Config, ConfigProxy, dynamic from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
from calibre.utils.localization import set_qt_translator from calibre.utils.localization import set_qt_translator
from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
gprefs = JSONConfig('gui')
NONE = QVariant() #: Null value to return from the data function of item models NONE = QVariant() #: Null value to return from the data function of item models

View File

@ -4,10 +4,14 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os
from optparse import OptionParser
from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.conversion.plumber import Plumber
# ?from calibre.library.catalog import Catalog from calibre.customize.ui import plugin_for_catalog_format
from calibre.utils.logging import Log from calibre.utils.logging import Log
from calibre.gui2 import choose_dir, Application
def gui_convert(input, output, recommendations, notification=DummyReporter(), def gui_convert(input, output, recommendations, notification=DummyReporter(),
abort_after_input_dump=False, log=None): abort_after_input_dump=False, log=None):
@ -21,7 +25,7 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(),
plumber.run() plumber.run()
def gui_catalog(fmt, title, dbspec, ids, out_file_name, def gui_catalog(fmt, title, dbspec, ids, out_file_name, fmt_options,
notification=DummyReporter(), log=None): notification=DummyReporter(), log=None):
if log is None: if log is None:
log = Log() log = Log()
@ -33,19 +37,25 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name,
else: # To be implemented in the future else: # To be implemented in the future
pass pass
# Implement the interface to the catalog generating code here # Create a minimal OptionParser that we can append to
#db parser = OptionParser()
log("gui2.convert.gui_conversion:gui_catalog()") args = []
log("fmt: %s" % fmt) parser.add_option("--verbose", action="store_true", dest="verbose", default=True)
log("title: %s" % title) opts, args = parser.parse_args()
log("dbspec: %s" % dbspec)
log("ids: %s" % ids) # Populate opts
log("out_file_name: %s" % out_file_name) opts.ids = ids
opts.search_text = None
opts.sort_by = None
# Extract the option dictionary to comma-separated lists
for option in fmt_options:
setattr(opts,option, ','.join(fmt_options[option]))
# Fetch and run the plugin for fmt
plugin = plugin_for_catalog_format(fmt)
plugin.run(out_file_name, opts, db)
# This needs to call the .run() method of the plugin associated with fmt
# Needs to set up options before the call
# catalog = Catalog(out_file_name, options, dbspec)
# Can I call library.cli:catalog_option_parser()?

View File

@ -12,15 +12,18 @@ from PyQt4.Qt import QDialog, QWidget
from calibre.customize.ui import config from calibre.customize.ui import config
from calibre.gui2.dialogs.catalog_ui import Ui_Dialog from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
from calibre.gui2 import dynamic from calibre.gui2 import gprefs, dynamic
from calibre.customize.ui import available_catalog_formats, catalog_plugins from calibre.customize.ui import available_catalog_formats, catalog_plugins
from calibre.gui2.catalog.catalog_csv_xml import PluginWidget from calibre.gui2.catalog.catalog_csv_xml import PluginWidget
class Catalog(QDialog, Ui_Dialog): class Catalog(QDialog, Ui_Dialog):
''' Catalog Dialog builder'''
widgets = []
def __init__(self, parent, dbspec, ids): def __init__(self, parent, dbspec, ids):
import re, cStringIO import re, cStringIO
from calibre import prints as info from calibre import prints as info
from calibre.gui2 import dynamic
from PyQt4.uic import compileUi from PyQt4.uic import compileUi
QDialog.__init__(self, parent) QDialog.__init__(self, parent)
@ -42,6 +45,7 @@ class Catalog(QDialog, Ui_Dialog):
self.fmts = [] self.fmts = []
from calibre.customize.builtins import plugins as builtin_plugins from calibre.customize.builtins import plugins as builtin_plugins
from calibre.customize import CatalogPlugin
for plugin in catalog_plugins(): for plugin in catalog_plugins():
if plugin.name in config['disabled_plugins']: if plugin.name in config['disabled_plugins']:
@ -49,38 +53,30 @@ class Catalog(QDialog, Ui_Dialog):
name = plugin.name.lower().replace(' ', '_') name = plugin.name.lower().replace(' ', '_')
if type(plugin) in builtin_plugins: if type(plugin) in builtin_plugins:
info("Adding tab for builtin Catalog plugin %s" % plugin.name) #info("Adding widget for builtin Catalog plugin %s" % plugin.name)
try: try:
catalog_widget = __import__('calibre.gui2.catalog.'+name, catalog_widget = __import__('calibre.gui2.catalog.'+name,
fromlist=[1]) fromlist=[1])
pw = catalog_widget.PluginWidget() pw = catalog_widget.PluginWidget()
pw.initialize() pw.initialize(name)
pw.ICON = I('forward.svg') pw.ICON = I('forward.svg')
page = self.tabs.addTab(pw,pw.TITLE) self.widgets.append(pw)
[self.fmts.append([file_type, pw.sync_enabled]) for file_type in plugin.file_types] [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
info("\tSupported formats: %s" % plugin.file_types)
info("\tsync_enabled: %s" % pw.sync_enabled)
except ImportError: except ImportError:
info("ImportError with %s" % name) info("ImportError with %s" % name)
continue continue
else: else:
# Test to see if .ui and .py files exist in tmpdir/calibre_plugin_resources # Load dynamic tab
form = os.path.join(tempfile.gettempdir(), form = os.path.join(plugin.resources_path,'%s.ui' % name)
'calibre_plugin_resources','%s.ui' % name) klass = os.path.join(plugin.resources_path,'%s.py' % name)
klass = os.path.join(tempfile.gettempdir(), compiled_form = os.path.join(plugin.resources_path,'%s_ui.py' % name)
'calibre_plugin_resources','%s.py' % name)
compiled_form = os.path.join(tempfile.gettempdir(),
'calibre_plugin_resources','%s_ui.py' % name)
plugin_resources = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources')
if os.path.exists(form) and os.path.exists(klass): if os.path.exists(form) and os.path.exists(klass):
info("Adding tab for user-installed Catalog plugin %s" % plugin.name) #info("Adding widget for user-installed Catalog plugin %s" % plugin.name)
# Compile the form provided in plugin.zip # Compile the .ui form provided in plugin.zip
if not os.path.exists(compiled_form) or \ if not os.path.exists(compiled_form):
os.stat(form).st_mtime > os.stat(compiled_form).st_mtime: # info('\tCompiling form', form)
info('\tCompiling form', form)
buf = cStringIO.StringIO() buf = cStringIO.StringIO()
compileUi(form, buf) compileUi(form, buf)
dat = buf.getvalue() dat = buf.getvalue()
@ -88,35 +84,41 @@ class Catalog(QDialog, Ui_Dialog):
re.DOTALL).sub(r'_("\1")', dat) re.DOTALL).sub(r'_("\1")', dat)
open(compiled_form, 'wb').write(dat) open(compiled_form, 'wb').write(dat)
# Import the Catalog class from the dynamic .py file # Import the dynamic PluginWidget() from .py file provided in plugin.zip
try: try:
sys.path.insert(0, plugin_resources) sys.path.insert(0, plugin.resources_path)
catalog_widget = __import__(name, fromlist=[1]) catalog_widget = __import__(name, fromlist=[1])
dpw = catalog_widget.PluginWidget() pw = catalog_widget.PluginWidget()
dpw.initialize() pw.initialize(name)
dpw.ICON = I('forward.svg') pw.ICON = I('forward.svg')
page = self.tabs.addTab(dpw, dpw.TITLE) self.widgets.append(pw)
[self.fmts.append([file_type, dpw.sync_enabled]) for file_type in plugin.file_types] [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
info("\tSupported formats: %s" % plugin.file_types)
info("\tsync_enabled: %s" % dpw.sync_enabled)
except ImportError: except ImportError:
info("ImportError with %s" % name) info("ImportError with %s" % name)
continue continue
finally: finally:
sys.path.remove(plugin_resources) sys.path.remove(plugin.resources_path)
else: else:
info("No dynamic tab resources found for %s" % name) info("No dynamic tab resources found for %s" % name)
self.widgets = sorted(self.widgets, key=lambda x:(x.TITLE, x.TITLE))
for pw in self.widgets:
page = self.tabs.addTab(pw,pw.TITLE)
# Generate a sorted list of installed catalog formats/sync_enabled pairs # Generate a sorted list of installed catalog formats/sync_enabled pairs
# Generate a parallel list of sync_enabled[True|False]ß fmts = sorted([x[0] for x in self.fmts])
self.fmts = sorted([x[0].upper() for x in self.fmts])
self.sync_enabled_formats = []
for fmt in self.fmts:
if fmt[1]:
self.sync_enabled_formats.append(fmt[0])
# Callback when format changes # Callback when format changes
self.format.currentIndexChanged.connect(self.format_changed) self.format.currentIndexChanged.connect(self.format_changed)
# Add the installed catalog format list to the format QComboBox # Add the installed catalog format list to the format QComboBox
self.format.addItems(self.fmts) self.format.addItems(fmts)
pref = dynamic.get('catalog_preferred_format', 'CSV') pref = dynamic.get('catalog_preferred_format', 'CSV')
idx = self.format.findText(pref) idx = self.format.findText(pref)
@ -127,9 +129,8 @@ class Catalog(QDialog, Ui_Dialog):
self.sync.setChecked(dynamic.get('catalog_sync_to_device', True)) self.sync.setChecked(dynamic.get('catalog_sync_to_device', True))
def format_changed(self, idx): def format_changed(self, idx):
print "format_changed(idx): idx: %d" % idx
cf = unicode(self.format.currentText()) cf = unicode(self.format.currentText())
if cf in ('EPUB', 'MOBI'): if cf in self.sync_enabled_formats:
self.sync.setEnabled(True) self.sync.setEnabled(True)
else: else:
self.sync.setDisabled(True) self.sync.setDisabled(True)

View File

@ -239,23 +239,35 @@ def fetch_scheduled_recipe(arg):
def generate_catalog(parent, dbspec, ids): def generate_catalog(parent, dbspec, ids):
from calibre.gui2.dialogs.catalog import Catalog from calibre.gui2.dialogs.catalog import Catalog
# Build the Catalog dialog # Build the Catalog dialog in gui2.dialogs.catalog
d = Catalog(parent, dbspec, ids) d = Catalog(parent, dbspec, ids)
if d.exec_() != d.Accepted: if d.exec_() != d.Accepted:
return None return None
# Create the output file # Create the output file
out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower()) out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())
# Retrieve plugin options
fmt_options = {}
for x in range(d.tabs.count()):
if str(d.tabs.tabText(x)).find(str(d.catalog_format)) > -1:
for fmt in d.fmts:
if fmt[0] == d.catalog_format:
fmt_options = fmt[2].options()
# print "gui2.tools:generate_catalog(): options for %s: %s" % (fmt[0], fmt_options)
args = [ args = [
d.catalog_format, d.catalog_format,
d.catalog_title, d.catalog_title,
dbspec, dbspec,
ids, ids,
out.name, out.name,
fmt_options
] ]
out.close() out.close()
# This calls gui2.convert.gui_conversion:gui_catalog()
return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \ return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
d.catalog_title d.catalog_title

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
'''The main GUI''' '''The main GUI'''
import os, sys, textwrap, collections, time import atexit, os, shutil, sys, tempfile, textwrap, collections, time
from xml.parsers.expat import ExpatError from xml.parsers.expat import ExpatError
from Queue import Queue, Empty from Queue import Queue, Empty
from threading import Thread from threading import Thread
@ -31,7 +31,7 @@ from calibre.utils.ipc.server import Server
from calibre.gui2 import warning_dialog, choose_files, error_dialog, \ from calibre.gui2 import warning_dialog, choose_files, error_dialog, \
question_dialog,\ question_dialog,\
pixmap_to_data, choose_dir, \ pixmap_to_data, choose_dir, \
Dispatcher, \ Dispatcher, gprefs, \
available_height, \ available_height, \
max_available_height, config, info_dialog, \ max_available_height, config, info_dialog, \
available_width, GetMetadata available_width, GetMetadata
@ -357,7 +357,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
cm.addAction(_('Bulk convert')) cm.addAction(_('Bulk convert'))
cm.addSeparator() cm.addSeparator()
ac = cm.addAction( ac = cm.addAction(
_('Create catalog of the books in your calibre library')) _('Create catalog of books in your calibre library'))
ac.triggered.connect(self.generate_catalog) ac.triggered.connect(self.generate_catalog)
self.action_convert.setMenu(cm) self.action_convert.setMenu(cm)
self._convert_single_hook = partial(self.convert_ebook, bulk=False) self._convert_single_hook = partial(self.convert_ebook, bulk=False)
@ -518,7 +518,21 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.connect(self.library_view.model(), SIGNAL('count_changed(int)'), self.connect(self.library_view.model(), SIGNAL('count_changed(int)'),
self.tags_view.recount) self.tags_view.recount)
self.connect(self.search, SIGNAL('cleared()'), self.tags_view.clear) self.connect(self.search, SIGNAL('cleared()'), self.tags_view.clear)
if not gprefs.get('quick_start_guide_added', False):
from calibre.ebooks.metadata import MetaInformation
mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember'])
mi.author_sort = 'Schember, John'
mi.comments = "A guide to get you up an running with calibre"
mi.publisher = 'calibre'
self.library_view.model().add_books([P('quick_start.epub')], ['epub'],
[mi])
gprefs['quick_start_guide_added'] = True
self.library_view.model().books_added(1)
if hasattr(self, 'db_images'):
self.db_images.reset()
self.library_view.model().count_changed() self.library_view.model().count_changed()
########################### Cover Flow ################################ ########################### Cover Flow ################################
self.cover_flow = None self.cover_flow = None
if CoverFlow is not None: if CoverFlow is not None:
@ -1008,7 +1022,6 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
return return
self._add_books(books, to_device) self._add_books(books, to_device)
def _add_books(self, paths, to_device, on_card=None): def _add_books(self, paths, to_device, on_card=None):
if on_card is None: if on_card is None:
on_card = 'carda' if self.stack.currentIndex() == 2 else 'cardb' if self.stack.currentIndex() == 3 else None on_card = 'carda' if self.stack.currentIndex() == 2 else 'cardb' if self.stack.currentIndex() == 3 else None
@ -1348,24 +1361,29 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
def generate_catalog(self): def generate_catalog(self):
rows = self.library_view.selectionModel().selectedRows() rows = self.library_view.selectionModel().selectedRows()
if not rows: if not rows or len(rows) < 2:
rows = xrange(self.library_view.model().rowCount(QModelIndex())) rows = xrange(self.library_view.model().rowCount(QModelIndex()))
ids = map(self.library_view.model().id, rows) ids = map(self.library_view.model().id, rows)
dbspec = None dbspec = None
if not ids: if not ids:
return error_dialog(self, _('No books selected'), return error_dialog(self, _('No books selected'),
_('No books selected to generate catalog for'), _('No books selected to generate catalog for'),
show=True) show=True)
# calibre.gui2.tools:generate_catalog()
# Calling gui2.tools:generate_catalog()
ret = generate_catalog(self, dbspec, ids) ret = generate_catalog(self, dbspec, ids)
if ret is None: if ret is None:
return return
func, args, desc, out, sync, title = ret func, args, desc, out, sync, title = ret
fmt = os.path.splitext(out)[1][1:].upper() fmt = os.path.splitext(out)[1][1:].upper()
job = self.job_manager.run_job( job = self.job_manager.run_job(
Dispatcher(self.catalog_generated), func, args=args, Dispatcher(self.catalog_generated), func, args=args,
description=desc) description=desc)
job.catalog_file_path = out job.catalog_file_path = out
job.fmt = fmt
job.catalog_sync, job.catalog_title = sync, title job.catalog_sync, job.catalog_title = sync, title
self.status_bar.showMessage(_('Generating %s catalog...')%fmt) self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
@ -1380,7 +1398,12 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
dynamic.set('catalogs_to_be_synced', sync) dynamic.set('catalogs_to_be_synced', sync)
self.status_bar.showMessage(_('Catalog generated.'), 3000) self.status_bar.showMessage(_('Catalog generated.'), 3000)
self.sync_catalogs() self.sync_catalogs()
if job.fmt in ['CSV','XML']:
export_dir = choose_dir(self, 'Export Catalog Directory',
'Select destination for %s.%s' % (job.catalog_title, job.fmt.lower()))
if export_dir:
destination = os.path.join(export_dir, '%s.%s' % (job.catalog_title, job.fmt.lower()))
shutil.copyfile(job.catalog_file_path, destination)
############################### Fetch news ################################# ############################### Fetch news #################################

View File

@ -40,10 +40,9 @@ class CSV_XML(CatalogPlugin):
from calibre.utils.logging import Log from calibre.utils.logging import Log
log = Log() log = Log()
self.fmt = path_to_output[path_to_output.rfind('.') + 1:]
# Update to .partition
self.fmt = path_to_output.rpartition('.')[2] self.fmt = path_to_output.rpartition('.')[2]
if opts.verbose:
if False and opts.verbose:
log("%s:run" % self.name) log("%s:run" % self.name)
log(" path_to_output: %s" % path_to_output) log(" path_to_output: %s" % path_to_output)
log(" Output format: %s" % self.fmt) log(" Output format: %s" % self.fmt)

View File

@ -644,6 +644,10 @@ def catalog_option_parser(args):
output, fmt = validate_command_line(parser, args, log) output, fmt = validate_command_line(parser, args, log)
# Add options common to all catalog plugins # Add options common to all catalog plugins
parser.add_option('-i', '--ids', default=None, dest='ids',
help=_("Comma-separated list of database IDs to catalog.\n"
"If declared, --search is ignored.\n"
"Default: all"))
parser.add_option('-s', '--search', default=None, dest='search_text', parser.add_option('-s', '--search', default=None, dest='search_text',
help=_("Filter the results by the search query. " help=_("Filter the results by the search query. "
"For the format of the search query, please see " "For the format of the search query, please see "
@ -656,31 +660,6 @@ def catalog_option_parser(args):
# Add options specific to fmt plugin # Add options specific to fmt plugin
plugin = add_plugin_parser_options(fmt, parser, log) plugin = add_plugin_parser_options(fmt, parser, log)
# Merge options from GUI Preferences
'''
# Placeholder sample code until we implement GUI preferences
from calibre.library.save_to_disk import config
c = config()
for pref in ['asciiize', 'update_metadata', 'write_opf', 'save_cover']:
opt = c.get_option(pref)
switch = '--dont-'+pref.replace('_', '-')
parser.add_option(switch, default=True, action='store_false',
help=opt.help+' '+_('Specifying this switch will turn '
'this behavior off.'), dest=pref)
for pref in ['timefmt', 'template', 'formats']:
opt = c.get_option(pref)
switch = '--'+pref
parser.add_option(switch, default=opt.default,
help=opt.help, dest=pref)
for pref in ('replace_whitespace', 'to_lowercase'):
opt = c.get_option(pref)
switch = '--'+pref.replace('_', '-')
parser.add_option(switch, default=False, action='store_true',
help=opt.help)
'''
return parser, plugin, log return parser, plugin, log
def command_catalog(args, dbpath): def command_catalog(args, dbpath):
@ -693,6 +672,9 @@ def command_catalog(args, dbpath):
return 1 return 1
if opts.verbose: if opts.verbose:
log("library.cli:command_catalog dispatching to plugin %s" % plugin.name) log("library.cli:command_catalog dispatching to plugin %s" % plugin.name)
if opts.ids:
opts.ids = [int(id) for id in opts.ids.split(',')]
with plugin: with plugin:
plugin.run(args[1], opts, get_db(dbpath, opts)) plugin.run(args[1], opts, get_db(dbpath, opts))
return 0 return 0

View File

@ -1634,13 +1634,15 @@ class LibraryDatabase2(LibraryDatabase):
for i in iter(self): for i in iter(self):
yield i[x] yield i[x]
def get_data_as_dict(self, prefix=None, authors_as_string=False): def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
''' '''
Return all metadata stored in the database as a dict. Includes paths to Return all metadata stored in the database as a dict. Includes paths to
the cover and each format. the cover and each format.
:param prefix: The prefix for all paths. By default, the prefix is the absolute path :param prefix: The prefix for all paths. By default, the prefix is the absolute path
to the library folder. to the library folder.
:param ids: Set of ids to return the data for. If None return data for
all entries in database.
''' '''
if prefix is None: if prefix is None:
prefix = self.library_path prefix = self.library_path
@ -1650,11 +1652,14 @@ class LibraryDatabase2(LibraryDatabase):
data = [] data = []
for record in self.data: for record in self.data:
if record is None: continue if record is None: continue
db_id = record[FIELD_MAP['id']]
if ids is not None and db_id not in ids:
continue
x = {} x = {}
for field in FIELDS: for field in FIELDS:
x[field] = record[FIELD_MAP[field]] x[field] = record[FIELD_MAP[field]]
data.append(x) data.append(x)
x['id'] = record[FIELD_MAP['id']] x['id'] = db_id
x['formats'] = [] x['formats'] = []
if not x['authors']: if not x['authors']:
x['authors'] = _('Unknown') x['authors'] = _('Unknown')

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
''' '''
Manage application-wide preferences. Manage application-wide preferences.
''' '''
import os, re, cPickle, textwrap, traceback, plistlib import os, re, cPickle, textwrap, traceback, plistlib, json
from copy import deepcopy from copy import deepcopy
from functools import partial from functools import partial
from optparse import OptionParser as _OptionParser from optparse import OptionParser as _OptionParser
@ -564,23 +564,31 @@ class XMLConfig(dict):
data types. data types.
''' '''
EXTENSION = '.plist'
def __init__(self, rel_path_to_cf_file): def __init__(self, rel_path_to_cf_file):
dict.__init__(self) dict.__init__(self)
self.file_path = os.path.join(config_dir, self.file_path = os.path.join(config_dir,
*(rel_path_to_cf_file.split('/'))) *(rel_path_to_cf_file.split('/')))
self.file_path = os.path.abspath(self.file_path) self.file_path = os.path.abspath(self.file_path)
if not self.file_path.endswith('.plist'): if not self.file_path.endswith(self.EXTENSION):
self.file_path += '.plist' self.file_path += self.EXTENSION
self.refresh() self.refresh()
def raw_to_object(self, raw):
return plistlib.readPlistFromString(raw)
def to_raw(self):
return plistlib.writePlistToString(self)
def refresh(self): def refresh(self):
d = {} d = {}
if os.path.exists(self.file_path): if os.path.exists(self.file_path):
with ExclusiveFile(self.file_path) as f: with ExclusiveFile(self.file_path) as f:
raw = f.read() raw = f.read()
try: try:
d = plistlib.readPlistFromString(raw) if raw.strip() else {} d = self.raw_to_object(raw) if raw.strip() else {}
except SystemError: except SystemError:
pass pass
except: except:
@ -618,11 +626,21 @@ class XMLConfig(dict):
if not os.path.exists(dpath): if not os.path.exists(dpath):
os.makedirs(dpath, mode=CONFIG_DIR_MODE) os.makedirs(dpath, mode=CONFIG_DIR_MODE)
with ExclusiveFile(self.file_path) as f: with ExclusiveFile(self.file_path) as f:
raw = plistlib.writePlistToString(self) raw = self.to_raw()
f.seek(0) f.seek(0)
f.truncate() f.truncate()
f.write(raw) f.write(raw)
class JSONConfig(XMLConfig):
EXTENSION = '.json'
def raw_to_object(self, raw):
return json.loads(raw.decode('utf-8'))
def to_raw(self):
return json.dumps(self, indent=2)
def _prefs(): def _prefs():
c = Config('global', 'calibre wide preferences') c = Config('global', 'calibre wide preferences')

View File

@ -104,6 +104,7 @@ _extra_lang_codes = {
'en_CY' : _('English (Cyprus)'), 'en_CY' : _('English (Cyprus)'),
'en_PK' : _('English (Pakistan)'), 'en_PK' : _('English (Pakistan)'),
'en_SG' : _('English (Singapore)'), 'en_SG' : _('English (Singapore)'),
'en_YE' : _('English (Yemen)'),
'de_AT' : _('German (AT)'), 'de_AT' : _('German (AT)'),
'nl' : _('Dutch (NL)'), 'nl' : _('Dutch (NL)'),
'nl_BE' : _('Dutch (BE)'), 'nl_BE' : _('Dutch (BE)'),