mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
GwR initial release of Catalog features
This commit is contained in:
commit
3024d37142
BIN
resources/images/news/joop.png
Normal file
BIN
resources/images/news/joop.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 395 B |
BIN
resources/images/news/kitsapun.png
Normal file
BIN
resources/images/news/kitsapun.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.3 KiB |
BIN
resources/images/news/nrcnext.png
Normal file
BIN
resources/images/news/nrcnext.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.7 KiB |
BIN
resources/quick_start.epub
Normal file
BIN
resources/quick_start.epub
Normal file
Binary file not shown.
@ -1,7 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
spectator.org
|
spectator.org
|
||||||
'''
|
'''
|
||||||
@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class TheAmericanSpectator(BasicNewsRecipe):
|
class TheAmericanSpectator(BasicNewsRecipe):
|
||||||
title = 'The American Spectator'
|
title = 'The American Spectator'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
language = 'en'
|
|
||||||
|
|
||||||
description = 'News from USA'
|
description = 'News from USA'
|
||||||
|
category = 'news, politics, USA, world'
|
||||||
|
publisher = 'The American Spectator'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
INDEX = 'http://spectator.org'
|
INDEX = 'http://spectator.org'
|
||||||
|
|
||||||
html2lrf_options = [
|
conversion_options = {
|
||||||
'--comment' , description
|
'comments' : description
|
||||||
, '--category' , 'news, politics, USA'
|
,'tags' : category
|
||||||
, '--publisher' , title
|
,'language' : language
|
||||||
]
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'post inner'})
|
dict(name='div', attrs={'class':'post inner'})
|
||||||
@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='object')
|
dict(name='object')
|
||||||
,dict(name='div', attrs={'class':'col3' })
|
,dict(name='div', attrs={'class':['col3','post-options','social']})
|
||||||
,dict(name='div', attrs={'class':'post-options' })
|
,dict(name='p' , attrs={'class':['letter-editor','meta']})
|
||||||
,dict(name='p' , attrs={'class':'letter-editor'})
|
|
||||||
,dict(name='div', attrs={'class':'social' })
|
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
|
feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe):
|
|||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '/print'
|
return url + '/print'
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('guid', None)
|
||||||
|
|
||||||
|
41
resources/recipes/drivelry.recipe
Normal file
41
resources/recipes/drivelry.recipe
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
class drivelrycom(BasicNewsRecipe):
|
||||||
|
title = u'drivelry.com'
|
||||||
|
language = 'en'
|
||||||
|
description = 'A blog by Mike Abrahams'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 60 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
#encoding = 'latin1'
|
||||||
|
|
||||||
|
remove_stylesheets = True
|
||||||
|
#remove_tags_before = dict(name='h1', attrs={'class':'heading'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'id':'bookmark'})
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name='div', attrs={'class':['sidebar']}),
|
||||||
|
dict(name='div', attrs={'id':['bookmark']}),
|
||||||
|
#dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}),
|
||||||
|
#dict(name='ul', attrs={'class':'articleTools'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('drivelry.com',
|
||||||
|
'http://feeds.feedburner.com/drivelry'),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
story = soup.find(name='div', attrs={'id':'main'})
|
||||||
|
#td = heading.findParent(name='td')
|
||||||
|
#td.extract()
|
||||||
|
soup = BeautifulSoup('''
|
||||||
|
<html><head><title>t</title></head><body>
|
||||||
|
<p>To donate to this blog: <a href="http://www.drivelry.com/thank-you/">click here</a></p>
|
||||||
|
</body></html>
|
||||||
|
''')
|
||||||
|
body = soup.find(name='body')
|
||||||
|
body.insert(0, story)
|
||||||
|
return soup
|
@ -1,23 +1,29 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
|
||||||
class FokkeEnSukkeRecipe(BasicNewsRecipe) :
|
class FokkeEnSukkeRecipe(BasicNewsRecipe) :
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'kwetal'
|
__author__ = 'kwetal'
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
description = u'Popular Dutch daily cartoon Fokke en Sukke'
|
country = 'NL'
|
||||||
|
version = 2
|
||||||
|
|
||||||
title = u'Fokke en Sukke'
|
title = u'Fokke en Sukke'
|
||||||
no_stylesheets = True
|
publisher = u'Reid, Geleijnse & Van Tol'
|
||||||
# For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue
|
category = u'News, Cartoons'
|
||||||
# with that: it treats it as content and displays it as is. Setting this property to empty solves this for me.
|
description = u'Popular Dutch daily cartoon Fokke en Sukke'
|
||||||
template_css = ''
|
|
||||||
INDEX = u'http://foksuk.nl'
|
|
||||||
|
|
||||||
# This cover is not as nice as it could be, needs some work
|
conversion_options = {'comments': description, 'language': language, 'publisher': publisher}
|
||||||
#cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
|
|
||||||
|
no_stylesheets = True
|
||||||
|
extra_css = '''
|
||||||
|
body{font-family: verdana, arial, helvetica, geneva, sans-serif ; margin: 0em; padding: 0em;}
|
||||||
|
div.title {text-align: center; margin-bottom: 1em;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
INDEX = u'http://foksuk.nl'
|
||||||
|
cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
|
keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
|
||||||
|
|
||||||
@ -31,15 +37,14 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) :
|
|||||||
links = index.findAll('a')
|
links = index.findAll('a')
|
||||||
maxIndex = len(links) - 1
|
maxIndex = len(links) - 1
|
||||||
articles = []
|
articles = []
|
||||||
for i in range(len(links)) :
|
for i in range(1, len(links)) :
|
||||||
# The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice.
|
# There can be more than one cartoon for a given day (currently either one or two).
|
||||||
if i == 0 :
|
# If there's only one, there is just a link with the dayname.
|
||||||
continue
|
# If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>.
|
||||||
|
# In that case we're interested in the last two.
|
||||||
# There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
|
|
||||||
# If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
|
|
||||||
if links[i].renderContents() in dayNames :
|
if links[i].renderContents() in dayNames :
|
||||||
# If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content
|
# If the link is not in daynames, we processed it already, but if it is, let's see
|
||||||
|
# if the next one has '1' as content
|
||||||
if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
|
if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
|
||||||
# Got you! Add it to the list
|
# Got you! Add it to the list
|
||||||
article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''}
|
article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''}
|
||||||
@ -59,29 +64,31 @@ class FokkeEnSukkeRecipe(BasicNewsRecipe) :
|
|||||||
return [[week, articles]]
|
return [[week, articles]]
|
||||||
|
|
||||||
def preprocess_html(self, soup) :
|
def preprocess_html(self, soup) :
|
||||||
# This method is called for every page, be it cartoon or TOC. We need to process each in their own way
|
|
||||||
cartoon = soup.find('div', attrs={'class' : 'cartoon'})
|
cartoon = soup.find('div', attrs={'class' : 'cartoon'})
|
||||||
if cartoon :
|
|
||||||
# It is a cartoon. Extract the title.
|
|
||||||
title = ''
|
title = ''
|
||||||
img = soup.find('img', attrs = {'alt' : True})
|
img = soup.find('img', attrs = {'alt' : True})
|
||||||
if img :
|
if img :
|
||||||
title = img['alt']
|
title = img['alt']
|
||||||
|
|
||||||
# Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that.
|
tag = Tag(soup, 'div', [('class', 'title')])
|
||||||
tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')])
|
|
||||||
tag.insert(0, title)
|
tag.insert(0, title)
|
||||||
cartoon.insert(0, tag)
|
cartoon.insert(0, tag)
|
||||||
|
|
||||||
# I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier,
|
# We only want the cartoon, so throw out the index
|
||||||
# and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook.
|
|
||||||
select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
|
select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
|
||||||
if select :
|
if select :
|
||||||
select.extract()
|
select.extract()
|
||||||
|
|
||||||
return cartoon
|
freshSoup = self.getFreshSoup(soup)
|
||||||
else :
|
freshSoup.body.append(cartoon)
|
||||||
# It is a TOC. Just return the whole lot.
|
|
||||||
return soup
|
return freshSoup
|
||||||
|
|
||||||
|
def getFreshSoup(self, oldSoup):
|
||||||
|
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
|
||||||
|
if oldSoup.head.title:
|
||||||
|
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
|
||||||
|
return freshSoup
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ class FTDe(BasicNewsRecipe):
|
|||||||
__author__ = 'Oliver Niesner'
|
__author__ = 'Oliver Niesner'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
language = 'de'
|
language = _('German')
|
||||||
max_articles_per_feed = 40
|
max_articles_per_feed = 40
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
@ -28,8 +28,13 @@ class FTDe(BasicNewsRecipe):
|
|||||||
dict(id='ADS_Top'),
|
dict(id='ADS_Top'),
|
||||||
dict(id='spinner'),
|
dict(id='spinner'),
|
||||||
dict(id='ftd-contentad'),
|
dict(id='ftd-contentad'),
|
||||||
|
dict(id='ftd-promo'),
|
||||||
dict(id='nava-50009007-1-0'),
|
dict(id='nava-50009007-1-0'),
|
||||||
dict(id='navli-50009007-1-0'),
|
dict(id='navli-50009007-1-0'),
|
||||||
|
dict(id='Box5000534-0-0-0'),
|
||||||
|
dict(id='ExpV-1-0-0-1'),
|
||||||
|
dict(id='ExpV-1-0-0-0'),
|
||||||
|
dict(id='PollExpV-2-0-0-0'),
|
||||||
dict(id='starRating'),
|
dict(id='starRating'),
|
||||||
dict(id='saveRating'),
|
dict(id='saveRating'),
|
||||||
dict(id='yLayer'),
|
dict(id='yLayer'),
|
||||||
@ -44,14 +49,19 @@ class FTDe(BasicNewsRecipe):
|
|||||||
dict(name='ul', attrs={'class':'nav'}),
|
dict(name='ul', attrs={'class':'nav'}),
|
||||||
dict(name='p', attrs={'class':'articleOptionHead'}),
|
dict(name='p', attrs={'class':'articleOptionHead'}),
|
||||||
dict(name='p', attrs={'class':'articleOptionFoot'}),
|
dict(name='p', attrs={'class':'articleOptionFoot'}),
|
||||||
|
dict(name='p', attrs={'class':'moreInfo'}),
|
||||||
dict(name='div', attrs={'class':'chartBox'}),
|
dict(name='div', attrs={'class':'chartBox'}),
|
||||||
dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
|
dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
|
||||||
dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
|
dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
|
||||||
dict(name='div', attrs={'class':'box boxNavTabs'}),
|
dict(name='div', attrs={'class':'box boxNavTabs'}),
|
||||||
|
dict(name='div', attrs={'class':'boxMMRgtLow'}),
|
||||||
dict(name='span', attrs={'class':'vote_455857'}),
|
dict(name='span', attrs={'class':'vote_455857'}),
|
||||||
dict(name='div', attrs={'class':'relatedhalb'}),
|
dict(name='div', attrs={'class':'relatedhalb'}),
|
||||||
dict(name='div', attrs={'class':'box boxListScrollOutline'}),
|
dict(name='div', attrs={'class':'box boxListScrollOutline'}),
|
||||||
|
dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
|
||||||
|
dict(name='div', attrs={'class':'box boxTeaser'}),
|
||||||
dict(name='div', attrs={'class':'tagCloud'}),
|
dict(name='div', attrs={'class':'tagCloud'}),
|
||||||
|
dict(name='div', attrs={'class':'pollView'}),
|
||||||
dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
|
dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
|
||||||
dict(name='div', attrs={'class':'ftdHpNav'}),
|
dict(name='div', attrs={'class':'ftdHpNav'}),
|
||||||
dict(name='div', attrs={'class':'ftdHead'}),
|
dict(name='div', attrs={'class':'ftdHead'}),
|
||||||
@ -67,9 +77,10 @@ class FTDe(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'class':'wertungoben'}),
|
dict(name='div', attrs={'class':'wertungoben'}),
|
||||||
dict(name='div', attrs={'class':'artikelfuss'}),
|
dict(name='div', attrs={'class':'artikelfuss'}),
|
||||||
dict(name='a', attrs={'class':'rating'}),
|
dict(name='a', attrs={'class':'rating'}),
|
||||||
|
dict(name='a', attrs={'href':'#rt'}),
|
||||||
dict(name='div', attrs={'class':'articleOptionFootFrame'}),
|
dict(name='div', attrs={'class':'articleOptionFootFrame'}),
|
||||||
dict(name='div', attrs={'class':'artikelsplitfaq'})]
|
dict(name='div', attrs={'class':'artikelsplitfaq'})]
|
||||||
remove_tags_after = [dict(name='a', attrs={'class':'more'})]
|
#remove_tags_after = [dict(name='a', attrs={'class':'more'})]
|
||||||
|
|
||||||
feeds = [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
|
feeds = [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
|
||||||
('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
|
('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
|
||||||
@ -86,4 +97,4 @@ class FTDe(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '?mode=print'
|
return url.replace('.html', '.html?mode=print')
|
||||||
|
38
resources/recipes/greader_uber.recipe
Normal file
38
resources/recipes/greader_uber.recipe
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import urllib, re, mechanize
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre import __appname__
|
||||||
|
|
||||||
|
class GoogleReaderUber(BasicNewsRecipe):
|
||||||
|
title = 'Google Reader Uber'
|
||||||
|
description = 'This recipe downloads all unread feedsfrom your Google Reader account.'
|
||||||
|
needs_subscription = True
|
||||||
|
__author__ = 'rollercoaster, davec'
|
||||||
|
base_url = 'http://www.google.com/reader/atom/'
|
||||||
|
oldest_article = 365
|
||||||
|
max_articles_per_feed = 250
|
||||||
|
get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
|
||||||
|
use_embedded_content = True
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
|
||||||
|
if self.username is not None and self.password is not None:
|
||||||
|
request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
|
||||||
|
('service', 'reader'), ('source', __appname__)])
|
||||||
|
response = br.open('https://www.google.com/accounts/ClientLogin', request)
|
||||||
|
sid = re.search('SID=(\S*)', response.read()).group(1)
|
||||||
|
|
||||||
|
cookies = mechanize.CookieJar()
|
||||||
|
br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
|
||||||
|
cookies.set_cookie(mechanize.Cookie(None, 'SID', sid, None, False, '.google.com', True, True, '/', True, False, None, True, '', '', None))
|
||||||
|
return br
|
||||||
|
|
||||||
|
|
||||||
|
def get_feeds(self):
|
||||||
|
feeds = []
|
||||||
|
soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
|
||||||
|
for id in soup.findAll(True, attrs={'name':['id']}):
|
||||||
|
url = id.contents[0].replace('broadcast','reading-list')
|
||||||
|
feeds.append((re.search('/([^/]*)$', url).group(1),
|
||||||
|
self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
|
||||||
|
return feeds
|
91
resources/recipes/joop.recipe
Normal file
91
resources/recipes/joop.recipe
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
import re
|
||||||
|
|
||||||
|
class JoopRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'kwetal'
|
||||||
|
language = 'nl'
|
||||||
|
country = 'NL'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'Joop'
|
||||||
|
publisher = u'Vara'
|
||||||
|
category = u'News, Politics, Discussion'
|
||||||
|
description = u'Political blog from the Netherlands'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
keep_only_tags = []
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'author_head clearfix photo'}))
|
||||||
|
keep_only_tags.append(dict(name = 'h2', attrs = {'class': 'columnhead smallline'}))
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'class': re.compile('article.*')}))
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
|
||||||
|
img {margin-right: 0.4em;}
|
||||||
|
h3 {font-size: medium; font-style: italic; font-weight: normal;}
|
||||||
|
h2 {font-size: xx-large; font-weight: bold}
|
||||||
|
sub {color: #666666; font-size: x-small; font-weight: normal;}
|
||||||
|
div.joop_byline {font-size: large}
|
||||||
|
div.joop_byline_job {font-size: small; color: #696969;}
|
||||||
|
div.joop_date {font-size: x-small; font-style: italic; margin-top: 0.6em}
|
||||||
|
'''
|
||||||
|
|
||||||
|
INDEX = 'http://www.joop.nl'
|
||||||
|
|
||||||
|
conversion_options = {'comments': description, 'tags': category, 'language': language,
|
||||||
|
'publisher': publisher}
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
sections = ['Politiek', 'Wereld', 'Economie', 'Groen', 'Media', 'Leven', 'Show', 'Opinies']
|
||||||
|
soup = self.index_to_soup(self.INDEX)
|
||||||
|
answer = []
|
||||||
|
|
||||||
|
div = soup.find('div', attrs = {'id': 'footer'})
|
||||||
|
for section in sections:
|
||||||
|
articles = []
|
||||||
|
h2 = div.find(lambda tag: tag.name == 'h2' and tag.renderContents() == section)
|
||||||
|
if h2:
|
||||||
|
ul = h2.findNextSibling('ul', 'linklist')
|
||||||
|
if ul:
|
||||||
|
for li in ul.findAll('li'):
|
||||||
|
title = self.tag_to_string(li.a)
|
||||||
|
url = self.INDEX + li.a['href']
|
||||||
|
articles.append({'title': title, 'date': None, 'url': url, 'description': ''})
|
||||||
|
|
||||||
|
answer.append((section, articles))
|
||||||
|
|
||||||
|
return answer
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
div = soup.find('div', 'author_head clearfix photo')
|
||||||
|
if div:
|
||||||
|
h2 = soup.find('h2')
|
||||||
|
if h2:
|
||||||
|
h2.name = 'div'
|
||||||
|
h2['class'] = 'joop_byline'
|
||||||
|
span = h2.find('span')
|
||||||
|
if span:
|
||||||
|
span.name = 'div'
|
||||||
|
span['class'] = 'joop_byline_job'
|
||||||
|
div.replaceWith(h2)
|
||||||
|
|
||||||
|
h2 = soup.find('h2', attrs = {'class': 'columnhead smallline'})
|
||||||
|
if h2:
|
||||||
|
txt = None
|
||||||
|
span = h2.find('span', 'info')
|
||||||
|
if span:
|
||||||
|
txt = span.find(text = True)
|
||||||
|
div = Tag(soup, 'div', attrs = [('class', 'joop_date')])
|
||||||
|
div.append(txt)
|
||||||
|
h2.replaceWith(div)
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
44
resources/recipes/kitsapun.recipe
Normal file
44
resources/recipes/kitsapun.recipe
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.kitsapun.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Kitsapsun(BasicNewsRecipe):
|
||||||
|
title = 'Kitsap Sun'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Kitsap County'
|
||||||
|
publisher = 'Scripps Interactive Newspapers Group'
|
||||||
|
category = 'news, Kitsap county, USA'
|
||||||
|
language = 'en'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher': publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':['story_meta','story_content']})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link','embed','form','iframe'])]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'News' , u'http://www.kitsapsun.com/rss/headlines/news/' )
|
||||||
|
,(u'Business' , u'http://www.kitsapsun.com/rss/headlines/business/' )
|
||||||
|
,(u'Communities' , u'http://www.kitsapsun.com/rss/headlines/communities/' )
|
||||||
|
,(u'Entertainment', u'http://www.kitsapsun.com/rss/headlines/entertainment/')
|
||||||
|
,(u'Lifestyles' , u'http://www.kitsapsun.com/rss/headlines/lifestyles/' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url.rpartition('/')[0] + '/?print=1'
|
79
resources/recipes/ledevoir.recipe
Normal file
79
resources/recipes/ledevoir.recipe
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'Lorenzo Vigentini'
|
||||||
|
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||||
|
__version__ = 'v1.01'
|
||||||
|
__date__ = '14, January 2010'
|
||||||
|
__description__ = 'Canadian Paper '
|
||||||
|
|
||||||
|
'''
|
||||||
|
http://www.ledevoir.com/
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ledevoir(BasicNewsRecipe):
|
||||||
|
author = 'Lorenzo Vigentini'
|
||||||
|
description = 'Canadian Paper'
|
||||||
|
|
||||||
|
cover_url = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
|
||||||
|
title = u'Le Devoir'
|
||||||
|
publisher = 'leDevoir.com'
|
||||||
|
category = 'News, finance, economy, politics'
|
||||||
|
|
||||||
|
language = 'fr'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
use_embedded_content = False
|
||||||
|
recursion = 10
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'id':'article'}),
|
||||||
|
dict(name='ul', attrs={'id':'ariane'})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':'dialog'}),
|
||||||
|
dict(name='div', attrs={'class':['interesse_actions','reactions']}),
|
||||||
|
dict(name='ul', attrs={'class':'mots_cles'}),
|
||||||
|
dict(name='a', attrs={'class':'haut'}),
|
||||||
|
dict(name='h5', attrs={'class':'interesse_actions'})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
|
||||||
|
(u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
|
||||||
|
(u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
|
||||||
|
(u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
|
||||||
|
(u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
|
||||||
|
(u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
|
||||||
|
(u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
|
||||||
|
(u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
|
||||||
|
(u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
|
||||||
|
(u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
|
||||||
|
(u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
|
||||||
|
(u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
|
||||||
|
h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
|
||||||
|
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
|
||||||
|
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
|
||||||
|
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
|
||||||
|
.specs {line-height:1em;margin:1px 0;}
|
||||||
|
.specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
|
||||||
|
.specs span.auteur a,
|
||||||
|
.specs span.auteur span {text-transform:uppercase;color:#787878;}
|
||||||
|
.specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
|
||||||
|
ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
|
||||||
|
ul#ariane li {display:inline;}
|
||||||
|
ul#ariane a {color:#2E2E2E;text-decoration:underline;}
|
||||||
|
.credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
|
||||||
|
.texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
|
||||||
|
'''
|
@ -70,11 +70,28 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, current_articles))
|
||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
story = soup.find(name='div', attrs={'class':'triline'})
|
story = soup.find(name='div', attrs={'class':'triline'})
|
||||||
#td = heading.findParent(name='td')
|
page2_link = soup.find('p','pagenav')
|
||||||
#td.extract()
|
if page2_link:
|
||||||
|
atag = page2_link.find('a',href=True)
|
||||||
|
if atag:
|
||||||
|
page2_url = atag['href']
|
||||||
|
if page2_url.startswith('story'):
|
||||||
|
page2_url = 'http://www.nationalpost.com/todays-paper/'+page2_url
|
||||||
|
elif page2_url.startswith( '/todays-paper/story.html'):
|
||||||
|
page2_url = 'http://www.nationalpost.com/'+page2_url
|
||||||
|
page2_soup = self.index_to_soup(page2_url)
|
||||||
|
if page2_soup:
|
||||||
|
page2_content = page2_soup.find('div','story-content')
|
||||||
|
if page2_content:
|
||||||
|
full_story = BeautifulSoup('<div></div>')
|
||||||
|
full_story.insert(0,story)
|
||||||
|
full_story.insert(1,page2_content)
|
||||||
|
story = full_story
|
||||||
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
|
||||||
body = soup.find(name='body')
|
body = soup.find(name='body')
|
||||||
body.insert(0, story)
|
body.insert(0, story)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -1,29 +1,38 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class NrcNextRecipe(BasicNewsRecipe):
|
class NrcNextRecipe(BasicNewsRecipe):
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__author__ = 'kwetal'
|
__author__ = 'kwetal'
|
||||||
version = 1
|
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.'
|
country = 'NL'
|
||||||
|
version = 2
|
||||||
|
|
||||||
title = u'nrcnext'
|
title = u'nrcnext'
|
||||||
|
publisher = u'NRC Media'
|
||||||
|
category = u'News, Opinion, the Netherlands'
|
||||||
|
description = u'Dutch newsblog from the Dutch daily newspaper nrcnext.'
|
||||||
|
|
||||||
|
conversion_options = {'comments': description, 'language': language, 'publisher': publisher}
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
template_css = ''
|
remove_javascript = True
|
||||||
|
|
||||||
# I want to do some special processing on the articles. I could not solve it with the 'extra_css' property . So we do it the hard way.
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id' : 'main'})]
|
keep_only_tags = [dict(name='div', attrs={'id' : 'main'})]
|
||||||
# If that's overkill for you comment out the previous line and uncomment the next. Then get rid of the preprocess_html() method.
|
|
||||||
#keep_only_tags = [dict(name='div', attrs={'class' : 'post'}), dict(name='div', attrs={'class' : 'vlag'}) ]
|
|
||||||
|
|
||||||
remove_tags = [dict(name = 'div', attrs = {'class' : 'meta'}),
|
remove_tags = []
|
||||||
dict(name = 'div', attrs = {'class' : 'datumlabel'}),
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'meta'}))
|
||||||
dict(name = 'ul', attrs = {'class' : 'cats single'}),
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'datumlabel'}))
|
||||||
dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}),
|
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats single'}))
|
||||||
dict(name = 'ul', attrs = {'class' : 'cats rubrieken'})]
|
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats onderwerpen'}))
|
||||||
|
remove_tags.append(dict(name = 'ul', attrs = {'class' : 'cats rubrieken'}))
|
||||||
|
|
||||||
use_embedded_content = False
|
extra_css = '''
|
||||||
|
body {font-family: verdana, arial, helvetica, geneva, sans-serif; text-align: left;}
|
||||||
|
p.wp-caption-text {font-size: x-small; color: #666666;}
|
||||||
|
h2.sub_title {font-size: medium; color: #696969;}
|
||||||
|
h2.vlag {font-size: small; font-weight: bold;}
|
||||||
|
'''
|
||||||
|
|
||||||
def parse_index(self) :
|
def parse_index(self) :
|
||||||
# Use the wesbite as an index. Their RSS feeds can be out of date.
|
# Use the wesbite as an index. Their RSS feeds can be out of date.
|
||||||
@ -44,10 +53,11 @@ class NrcNextRecipe(BasicNewsRecipe):
|
|||||||
# Find the links to the actual articles and rember the location they're pointing to and the title
|
# Find the links to the actual articles and rember the location they're pointing to and the title
|
||||||
a = post.find('a', attrs={'rel' : 'bookmark'})
|
a = post.find('a', attrs={'rel' : 'bookmark'})
|
||||||
href = a['href']
|
href = a['href']
|
||||||
title = a.renderContents()
|
title = self.tag_to_string(a)
|
||||||
|
|
||||||
if index == 'columnisten' :
|
if index == 'columnisten' :
|
||||||
# In this feed/page articles can be written by more than one author. It is nice to see their names in the titles.
|
# In this feed/page articles can be written by more than one author.
|
||||||
|
# It is nice to see their names in the titles.
|
||||||
flag = post.find('h2', attrs = {'class' : 'vlag'})
|
flag = post.find('h2', attrs = {'class' : 'vlag'})
|
||||||
author = flag.contents[0].renderContents()
|
author = flag.contents[0].renderContents()
|
||||||
completeTitle = u''.join([author, u': ', title])
|
completeTitle = u''.join([author, u': ', title])
|
||||||
@ -71,44 +81,46 @@ class NrcNextRecipe(BasicNewsRecipe):
|
|||||||
return answer
|
return answer
|
||||||
|
|
||||||
def preprocess_html(self, soup) :
|
def preprocess_html(self, soup) :
|
||||||
# This method is called for every page, be it cartoon or TOC. We need to process each in their own way
|
|
||||||
if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}):
|
if soup.find('div', attrs = {'id' : 'main', 'class' : 'single'}):
|
||||||
# It's an article, find the interesting part
|
|
||||||
tag = soup.find('div', attrs = {'class' : 'post'})
|
tag = soup.find('div', attrs = {'class' : 'post'})
|
||||||
if tag:
|
if tag:
|
||||||
# And replace any links with their text, so they don't show up underlined on my reader.
|
h2 = tag.find('h2', 'vlag')
|
||||||
for link in tag.findAll('a') :
|
if h2:
|
||||||
link.replaceWith(link.renderContents())
|
new_h2 = Tag(soup, 'h2', attrs = [('class', 'vlag')])
|
||||||
|
new_h2.append(self.tag_to_string(h2))
|
||||||
|
h2.replaceWith(new_h2)
|
||||||
|
else:
|
||||||
|
h2 = tag.find('h2')
|
||||||
|
if h2:
|
||||||
|
new_h2 = Tag(soup, 'h2', attrs = [('class', 'sub_title')])
|
||||||
|
new_h2.append(self.tag_to_string(h2))
|
||||||
|
h2.replaceWith(new_h2)
|
||||||
|
|
||||||
# Slows down my Sony reader; feel free to comment out
|
h1 = tag.find('h1')
|
||||||
|
if h1:
|
||||||
|
new_h1 = Tag(soup, 'h1')
|
||||||
|
new_h1.append(self.tag_to_string(h1))
|
||||||
|
h1.replaceWith(new_h1)
|
||||||
|
|
||||||
|
# Slows down my reader.
|
||||||
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}):
|
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqvimeo'}):
|
||||||
movie.extract()
|
movie.extract()
|
||||||
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}):
|
for movie in tag.findAll('span', attrs = {'class' : 'vvqbox vvqyoutube'}):
|
||||||
movie.extract()
|
movie.extract()
|
||||||
|
for iframe in tag.findAll('iframe') :
|
||||||
|
iframe.extract()
|
||||||
|
|
||||||
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
|
fresh_soup = self.getFreshSoup(soup)
|
||||||
body = homeMadeSoup.find('body')
|
fresh_soup.body.append(tag)
|
||||||
body.append(tag)
|
|
||||||
|
|
||||||
return homeMadeSoup
|
return fresh_soup
|
||||||
else:
|
else:
|
||||||
# This should never happen and other famous last words...
|
# This should never happen and other famous last words...
|
||||||
return soup
|
return soup
|
||||||
else :
|
|
||||||
# It's a TOC, return the whole lot.
|
|
||||||
return soup
|
|
||||||
|
|
||||||
def postproces_html(self, soup) :
|
|
||||||
# Should not happen, but it does. Slows down my Sony eReader
|
|
||||||
for img in soup.findAll('img') :
|
|
||||||
if img['src'].startswith('http://') :
|
|
||||||
img.extract()
|
|
||||||
|
|
||||||
# Happens for some movies which we are not able to view anyway
|
|
||||||
for iframe in soup.findAll('iframe') :
|
|
||||||
if iframe['src'].startswith('http://') :
|
|
||||||
iframe.extract()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def getFreshSoup(self, oldSoup):
|
||||||
|
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
|
||||||
|
if oldSoup.head.title:
|
||||||
|
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
|
||||||
|
return freshSoup
|
||||||
|
|
||||||
|
125
resources/recipes/yementimes.recipe
Normal file
125
resources/recipes/yementimes.recipe
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
class YemenTimesRecipe(BasicNewsRecipe):
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'kwetal'
|
||||||
|
language = 'en_YE'
|
||||||
|
country = 'YE'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'Yemen Times'
|
||||||
|
publisher = u'yementimes.com'
|
||||||
|
category = u'News, Opinion, Yemen'
|
||||||
|
description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
|
||||||
|
remove_empty_feeds = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
keep_only_tags = []
|
||||||
|
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'ctl00_ContentPlaceHolder1_MAINNEWS0_Panel1',
|
||||||
|
'class': 'DMAIN2'}))
|
||||||
|
remove_attributes = ['style']
|
||||||
|
|
||||||
|
INDEX = 'http://www.yementimes.com/'
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u'Our Viewpoint', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=6&pnm=OUR%20VIEWPOINT'))
|
||||||
|
feeds.append((u'Local News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=3&pnm=Local%20news'))
|
||||||
|
feeds.append((u'Their News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=80&pnm=Their%20News'))
|
||||||
|
feeds.append((u'Report', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=8&pnm=report'))
|
||||||
|
feeds.append((u'Health', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=51&pnm=health'))
|
||||||
|
feeds.append((u'Interview', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=77&pnm=interview'))
|
||||||
|
feeds.append((u'Opinion', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=7&pnm=opinion'))
|
||||||
|
feeds.append((u'Business', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=5&pnm=business'))
|
||||||
|
feeds.append((u'Op-Ed', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=81&pnm=Op-Ed'))
|
||||||
|
feeds.append((u'Culture', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=75&pnm=Culture'))
|
||||||
|
feeds.append((u'Readers View', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=4&pnm=Readers%20View'))
|
||||||
|
feeds.append((u'Variety', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=9&pnm=Variety'))
|
||||||
|
feeds.append((u'Education', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=57&pnm=Education'))
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
body {font-family:verdana, arial, helvetica, geneva, sans-serif;}
|
||||||
|
div.yemen_byline {font-size: medium; font-weight: bold;}
|
||||||
|
div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;}
|
||||||
|
.yemen_caption {font-size: x-small; font-style: italic; color: #696969;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
|
||||||
|
'publisher': publisher, 'linearize_tables': True}
|
||||||
|
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.set_handle_gzip(True)
|
||||||
|
|
||||||
|
return br
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
answer = []
|
||||||
|
for feed_title, feed in self.feeds:
|
||||||
|
soup = self.index_to_soup(feed)
|
||||||
|
|
||||||
|
newsbox = soup.find('div', 'newsbox')
|
||||||
|
main = newsbox.findNextSibling('table')
|
||||||
|
|
||||||
|
articles = []
|
||||||
|
for li in main.findAll('li'):
|
||||||
|
title = self.tag_to_string(li.a)
|
||||||
|
url = self.INDEX + li.a['href']
|
||||||
|
articles.append({'title': title, 'date': None, 'url': url, 'description': '<br/> '})
|
||||||
|
|
||||||
|
answer.append((feed_title, articles))
|
||||||
|
|
||||||
|
return answer
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
freshSoup = self.getFreshSoup(soup)
|
||||||
|
|
||||||
|
headline = soup.find('div', attrs = {'id': 'DVMTIT'})
|
||||||
|
if headline:
|
||||||
|
div = headline.findNext('div', attrs = {'id': 'DVTOP'})
|
||||||
|
img = None
|
||||||
|
if div:
|
||||||
|
img = div.find('img')
|
||||||
|
|
||||||
|
headline.name = 'h1'
|
||||||
|
freshSoup.body.append(headline)
|
||||||
|
if img is not None:
|
||||||
|
freshSoup.body.append(img)
|
||||||
|
|
||||||
|
byline = soup.find('div', attrs = {'id': 'DVTIT'})
|
||||||
|
if byline:
|
||||||
|
date_el = byline.find('span')
|
||||||
|
if date_el:
|
||||||
|
pub_date = self.tag_to_string(date_el)
|
||||||
|
date = Tag(soup, 'div', attrs = [('class', 'yemen_date')])
|
||||||
|
date.append(pub_date)
|
||||||
|
date_el.extract()
|
||||||
|
|
||||||
|
raw = '<br/>'.join(['%s' % (part) for part in byline.findAll(text = True)])
|
||||||
|
author = BeautifulSoup('<div class="yemen_byline">' + raw + '</div>')
|
||||||
|
|
||||||
|
if date is not None:
|
||||||
|
freshSoup.body.append(date)
|
||||||
|
freshSoup.body.append(author)
|
||||||
|
|
||||||
|
story = soup.find('div', attrs = {'id': 'DVDET'})
|
||||||
|
if story:
|
||||||
|
for table in story.findAll('table'):
|
||||||
|
if table.find('img'):
|
||||||
|
table['class'] = 'yemen_caption'
|
||||||
|
|
||||||
|
freshSoup.body.append(story)
|
||||||
|
|
||||||
|
return freshSoup
|
||||||
|
|
||||||
|
def getFreshSoup(self, oldSoup):
|
||||||
|
freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
|
||||||
|
if oldSoup.head.title:
|
||||||
|
freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
|
||||||
|
return freshSoup
|
@ -2,11 +2,12 @@ from __future__ import with_statement
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import os, sys, tempfile, zipfile
|
import atexit, os, shutil, sys, tempfile, zipfile
|
||||||
|
|
||||||
from calibre.constants import numeric_version
|
from calibre.constants import numeric_version
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
class Plugin(object):
|
class Plugin(object):
|
||||||
'''
|
'''
|
||||||
A calibre plugin. Useful members include:
|
A calibre plugin. Useful members include:
|
||||||
@ -231,6 +232,8 @@ class CatalogPlugin(Plugin):
|
|||||||
A plugin that implements a catalog generator.
|
A plugin that implements a catalog generator.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
resources_path = None
|
||||||
|
|
||||||
#: Output file type for which this plugin should be run
|
#: Output file type for which this plugin should be run
|
||||||
#: For example: 'epub' or 'xml'
|
#: For example: 'epub' or 'xml'
|
||||||
file_types = set([])
|
file_types = set([])
|
||||||
@ -249,22 +252,18 @@ class CatalogPlugin(Plugin):
|
|||||||
|
|
||||||
cli_options = []
|
cli_options = []
|
||||||
|
|
||||||
def cleanup(self, path):
|
|
||||||
try:
|
|
||||||
import os, shutil
|
|
||||||
if os.path.exists(path):
|
|
||||||
shutil.rmtree(path)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def search_sort_db(self, db, opts):
|
def search_sort_db(self, db, opts):
|
||||||
if opts.search_text:
|
|
||||||
|
# If declared, --ids overrides any declared search criteria
|
||||||
|
if not opts.ids and opts.search_text:
|
||||||
db.search(opts.search_text)
|
db.search(opts.search_text)
|
||||||
|
|
||||||
if opts.sort_by:
|
if opts.sort_by:
|
||||||
# 2nd arg = ascending
|
# 2nd arg = ascending
|
||||||
db.sort(opts.sort_by, True)
|
db.sort(opts.sort_by, True)
|
||||||
|
|
||||||
return db.get_data_as_dict()
|
return db.get_data_as_dict(ids=opts.ids)
|
||||||
|
|
||||||
def get_output_fields(self, opts):
|
def get_output_fields(self, opts):
|
||||||
# Return a list of requested fields, with opts.sort_by first
|
# Return a list of requested fields, with opts.sort_by first
|
||||||
@ -280,7 +279,9 @@ class CatalogPlugin(Plugin):
|
|||||||
fields = list(all_fields & requested_fields)
|
fields = list(all_fields & requested_fields)
|
||||||
else:
|
else:
|
||||||
fields = list(all_fields)
|
fields = list(all_fields)
|
||||||
|
|
||||||
fields.sort()
|
fields.sort()
|
||||||
|
if opts.sort_by:
|
||||||
fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
|
fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
|
||||||
return fields
|
return fields
|
||||||
|
|
||||||
@ -291,35 +292,27 @@ class CatalogPlugin(Plugin):
|
|||||||
Tab will be dynamically generated and added to the Catalog Options dialog in
|
Tab will be dynamically generated and added to the Catalog Options dialog in
|
||||||
calibre.gui2.dialogs.catalog.py:Catalog
|
calibre.gui2.dialogs.catalog.py:Catalog
|
||||||
'''
|
'''
|
||||||
import atexit
|
|
||||||
from calibre.customize.builtins import plugins as builtin_plugins
|
from calibre.customize.builtins import plugins as builtin_plugins
|
||||||
|
from calibre.customize.ui import config
|
||||||
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
|
|
||||||
if type(self) in builtin_plugins:
|
if not type(self) in builtin_plugins and \
|
||||||
print "%s: Built-in Catalog plugin, no init necessary" % self.name
|
not self.name in config['disabled_plugins']:
|
||||||
else:
|
|
||||||
print "%s: User-added plugin" % self.name
|
|
||||||
print " Copying .ui and .py resources from %s to tmpdir" % self.plugin_path
|
|
||||||
|
|
||||||
# Generate a list of resource files to extract from the zipped plugin
|
|
||||||
# Copy to tmpdir/calibre_plugin_resources
|
|
||||||
files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]]
|
files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]]
|
||||||
print " files_to_copy: %s" % files_to_copy
|
|
||||||
resources = zipfile.ZipFile(self.plugin_path,'r')
|
resources = zipfile.ZipFile(self.plugin_path,'r')
|
||||||
temp_resources_path = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources')
|
|
||||||
|
if self.resources_path is None:
|
||||||
|
self.resources_path = PersistentTemporaryDirectory('_plugin_resources', prefix='')
|
||||||
|
|
||||||
for file in files_to_copy:
|
for file in files_to_copy:
|
||||||
try:
|
try:
|
||||||
resources.extract(file, temp_resources_path)
|
resources.extract(file, self.resources_path)
|
||||||
print " %s extracted to %s" % (file, temp_resources_path)
|
|
||||||
except:
|
except:
|
||||||
print " %s not found in %s" % (file, os.path.basename(self.plugin_path))
|
print " customize:__init__.initialize(): %s not found in %s" % (file, os.path.basename(self.plugin_path))
|
||||||
|
continue
|
||||||
resources.close()
|
resources.close()
|
||||||
|
|
||||||
# Register temp_resources_path for deletion when calibre exits
|
def run(self, path_to_output, opts, db, ids):
|
||||||
atexit.register(self.cleanup, temp_resources_path)
|
|
||||||
|
|
||||||
|
|
||||||
def run(self, path_to_output, opts, db):
|
|
||||||
'''
|
'''
|
||||||
Run the plugin. Must be implemented in subclasses.
|
Run the plugin. Must be implemented in subclasses.
|
||||||
It should generate the catalog in the format specified
|
It should generate the catalog in the format specified
|
||||||
|
@ -14,6 +14,7 @@ Windows PNP strings:
|
|||||||
2W00000&1', 3, u'G:\\')
|
2W00000&1', 3, u'G:\\')
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
import re
|
||||||
|
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS
|
||||||
|
|
||||||
@ -108,6 +109,7 @@ class POCKETBOOK360(EB600):
|
|||||||
|
|
||||||
OSX_MAIN_MEM = 'Philips Mass Storge Media'
|
OSX_MAIN_MEM = 'Philips Mass Storge Media'
|
||||||
OSX_CARD_A_MEM = 'Philips Mass Storge Media'
|
OSX_CARD_A_MEM = 'Philips Mass Storge Media'
|
||||||
|
OSX_MAIN_MEM_VOL_PAT = re.compile(r'/Pocket')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def can_handle(cls, dev, debug=False):
|
def can_handle(cls, dev, debug=False):
|
||||||
|
@ -128,6 +128,10 @@ def do_set_metadata(opts, mi, stream, stream_type):
|
|||||||
mi.title_sort = title_sort(opts.title)
|
mi.title_sort = title_sort(opts.title)
|
||||||
if getattr(opts, 'tags', None) is not None:
|
if getattr(opts, 'tags', None) is not None:
|
||||||
mi.tags = [t.strip() for t in opts.tags.split(',')]
|
mi.tags = [t.strip() for t in opts.tags.split(',')]
|
||||||
|
if getattr(opts, 'series', None) is not None:
|
||||||
|
mi.series = opts.series.strip()
|
||||||
|
if getattr(opts, 'series_index', None) is not None:
|
||||||
|
mi.series_index = float(opts.series_index.strip())
|
||||||
|
|
||||||
if getattr(opts, 'cover', None) is not None:
|
if getattr(opts, 'cover', None) is not None:
|
||||||
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
|
ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
|
||||||
|
@ -25,12 +25,14 @@ def get_document_info(stream):
|
|||||||
while not found:
|
while not found:
|
||||||
prefix = block[-6:]
|
prefix = block[-6:]
|
||||||
block = prefix + stream.read(block_size)
|
block = prefix + stream.read(block_size)
|
||||||
|
actual_block_size = len(block) - len(prefix)
|
||||||
if len(block) == len(prefix):
|
if len(block) == len(prefix):
|
||||||
break
|
break
|
||||||
idx = block.find(r'{\info')
|
idx = block.find(r'{\info')
|
||||||
if idx >= 0:
|
if idx >= 0:
|
||||||
found = True
|
found = True
|
||||||
stream.seek(stream.tell() - block_size + idx - len(prefix))
|
pos = stream.tell() - actual_block_size + idx - len(prefix)
|
||||||
|
stream.seek(pos)
|
||||||
else:
|
else:
|
||||||
if block.find(r'\sect') > -1:
|
if block.find(r'\sect') > -1:
|
||||||
break
|
break
|
||||||
|
@ -90,7 +90,10 @@ class DetectStructure(object):
|
|||||||
mark = etree.Element(XHTML('div'), style=page_break_after)
|
mark = etree.Element(XHTML('div'), style=page_break_after)
|
||||||
else: # chapter_mark == 'both':
|
else: # chapter_mark == 'both':
|
||||||
mark = etree.Element(XHTML('hr'), style=page_break_before)
|
mark = etree.Element(XHTML('hr'), style=page_break_before)
|
||||||
|
try:
|
||||||
elem.addprevious(mark)
|
elem.addprevious(mark)
|
||||||
|
except TypeError:
|
||||||
|
self.log.exception('Failed to mark chapter')
|
||||||
|
|
||||||
def create_level_based_toc(self):
|
def create_level_based_toc(self):
|
||||||
if self.opts.level1_toc is None:
|
if self.opts.level1_toc is None:
|
||||||
|
@ -20,6 +20,10 @@ class Font(object):
|
|||||||
|
|
||||||
class Column(object):
|
class Column(object):
|
||||||
|
|
||||||
|
# A column contains an element is the element bulges out to
|
||||||
|
# the left or the right by at most HFUZZ*col width.
|
||||||
|
HFUZZ = 0.2
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.left = self.right = self.top = self.bottom = 0
|
self.left = self.right = self.top = self.bottom = 0
|
||||||
self.width = self.height = 0
|
self.width = self.height = 0
|
||||||
@ -41,6 +45,10 @@ class Column(object):
|
|||||||
for x in self.elements:
|
for x in self.elements:
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
|
def contains(self, elem):
|
||||||
|
return elem.left > self.left - self.HFUZZ*self.width and \
|
||||||
|
elem.right < self.right + self.HFUZZ*self.width
|
||||||
|
|
||||||
class Element(object):
|
class Element(object):
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
@ -238,11 +246,10 @@ class Page(object):
|
|||||||
return columns
|
return columns
|
||||||
|
|
||||||
def find_elements_in_row_of(self, x):
|
def find_elements_in_row_of(self, x):
|
||||||
interval = Interval(x.top - self.YFUZZ * self.average_text_height,
|
interval = Interval(x.top,
|
||||||
x.top + self.YFUZZ*(1+self.average_text_height))
|
x.top + self.YFUZZ*(1+self.average_text_height))
|
||||||
h_interval = Interval(x.left, x.right)
|
h_interval = Interval(x.left, x.right)
|
||||||
m = max(0, x.idx-15)
|
for y in self.elements[x.idx:x.idx+15]:
|
||||||
for y in self.elements[m:x.idx+15]:
|
|
||||||
if y is not x:
|
if y is not x:
|
||||||
y_interval = Interval(y.top, y.bottom)
|
y_interval = Interval(y.top, y.bottom)
|
||||||
x_interval = Interval(y.left, y.right)
|
x_interval = Interval(y.left, y.right)
|
||||||
|
@ -169,6 +169,21 @@ class RTFInput(InputFormatPlugin):
|
|||||||
with open('styles.css', 'ab') as f:
|
with open('styles.css', 'ab') as f:
|
||||||
f.write(css)
|
f.write(css)
|
||||||
|
|
||||||
|
def preprocess(self, fname):
|
||||||
|
self.log('\tPreprocessing to convert unicode characters')
|
||||||
|
try:
|
||||||
|
data = open(fname, 'rb').read()
|
||||||
|
from calibre.ebooks.rtf.preprocess import RtfTokenizer, RtfTokenParser
|
||||||
|
tokenizer = RtfTokenizer(data)
|
||||||
|
tokens = RtfTokenParser(tokenizer.tokens)
|
||||||
|
data = tokens.toRTF()
|
||||||
|
fname = 'preprocessed.rtf'
|
||||||
|
with open(fname, 'wb') as f:
|
||||||
|
f.write(data)
|
||||||
|
except:
|
||||||
|
self.log.exception(
|
||||||
|
'Failed to preprocess RTF to convert unicode sequences, ignoring...')
|
||||||
|
return fname
|
||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
@ -177,8 +192,9 @@ class RTFInput(InputFormatPlugin):
|
|||||||
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
|
||||||
self.log = log
|
self.log = log
|
||||||
self.log('Converting RTF to XML...')
|
self.log('Converting RTF to XML...')
|
||||||
|
fname = self.preprocess(stream.name)
|
||||||
try:
|
try:
|
||||||
xml = self.generate_xml(stream.name)
|
xml = self.generate_xml(fname)
|
||||||
except RtfInvalidCodeException:
|
except RtfInvalidCodeException:
|
||||||
raise ValueError(_('This RTF file has a feature calibre does not '
|
raise ValueError(_('This RTF file has a feature calibre does not '
|
||||||
'support. Convert it to HTML first and then try it.'))
|
'support. Convert it to HTML first and then try it.'))
|
||||||
|
344
src/calibre/ebooks/rtf/preprocess.py
Normal file
344
src/calibre/ebooks/rtf/preprocess.py
Normal file
@ -0,0 +1,344 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Gerendi Sandor Attila'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
"""
|
||||||
|
RTF tokenizer and token parser. v.1.0 (1/17/2010)
|
||||||
|
Author: Gerendi Sandor Attila
|
||||||
|
|
||||||
|
At this point this will tokenize a RTF file then rebuild it from the tokens.
|
||||||
|
In the process the UTF8 tokens are altered to be supported by the RTF2XML and also remain RTF specification compilant.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class tokenDelimitatorStart():
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
def toRTF(self):
|
||||||
|
return b'{'
|
||||||
|
def __repr__(self):
|
||||||
|
return '{'
|
||||||
|
|
||||||
|
class tokenDelimitatorEnd():
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
def toRTF(self):
|
||||||
|
return b'}'
|
||||||
|
def __repr__(self):
|
||||||
|
return '}'
|
||||||
|
|
||||||
|
class tokenControlWord():
|
||||||
|
def __init__(self, name, separator = ''):
|
||||||
|
self.name = name
|
||||||
|
self.separator = separator
|
||||||
|
def toRTF(self):
|
||||||
|
return self.name + self.separator
|
||||||
|
def __repr__(self):
|
||||||
|
return self.name + self.separator
|
||||||
|
|
||||||
|
class tokenControlWordWithNumericArgument():
|
||||||
|
def __init__(self, name, argument, separator = ''):
|
||||||
|
self.name = name
|
||||||
|
self.argument = argument
|
||||||
|
self.separator = separator
|
||||||
|
def toRTF(self):
|
||||||
|
return self.name + repr(self.argument) + self.separator
|
||||||
|
def __repr__(self):
|
||||||
|
return self.name + repr(self.argument) + self.separator
|
||||||
|
|
||||||
|
class tokenControlSymbol():
|
||||||
|
def __init__(self, name):
|
||||||
|
self.name = name
|
||||||
|
def toRTF(self):
|
||||||
|
return self.name
|
||||||
|
def __repr__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
class tokenData():
|
||||||
|
def __init__(self, data):
|
||||||
|
self.data = data
|
||||||
|
def toRTF(self):
|
||||||
|
return self.data
|
||||||
|
def __repr__(self):
|
||||||
|
return self.data
|
||||||
|
|
||||||
|
class tokenBinN():
|
||||||
|
def __init__(self, data, separator = ''):
|
||||||
|
self.data = data
|
||||||
|
self.separator = separator
|
||||||
|
def toRTF(self):
|
||||||
|
return "\\bin" + repr(len(self.data)) + self.separator + self.data
|
||||||
|
def __repr__(self):
|
||||||
|
return "\\bin" + repr(len(self.data)) + self.separator + self.data
|
||||||
|
|
||||||
|
class token8bitChar():
|
||||||
|
def __init__(self, data):
|
||||||
|
self.data = data
|
||||||
|
def toRTF(self):
|
||||||
|
return "\\'" + self.data
|
||||||
|
def __repr__(self):
|
||||||
|
return "\\'" + self.data
|
||||||
|
|
||||||
|
class tokenUnicode():
|
||||||
|
def __init__(self, data, separator = '', current_ucn = 1, eqList = []):
|
||||||
|
self.data = data
|
||||||
|
self.separator = separator
|
||||||
|
self.current_ucn = current_ucn
|
||||||
|
self.eqList = eqList
|
||||||
|
def toRTF(self):
|
||||||
|
result = '\\u' + repr(self.data) + ' '
|
||||||
|
ucn = self.current_ucn
|
||||||
|
if len(self.eqList) < ucn:
|
||||||
|
ucn = len(self.eqList)
|
||||||
|
result = tokenControlWordWithNumericArgument('\\uc', ucn).toRTF() + result
|
||||||
|
i = 0
|
||||||
|
for eq in self.eqList:
|
||||||
|
if i >= ucn:
|
||||||
|
break
|
||||||
|
result = result + eq.toRTF()
|
||||||
|
return result
|
||||||
|
def __repr__(self):
|
||||||
|
return '\\u' + repr(self.data)
|
||||||
|
|
||||||
|
|
||||||
|
def isAsciiLetter(value):
|
||||||
|
return ((value >= 'a') and (value <= 'z')) or ((value >= 'A') and (value <= 'Z'))
|
||||||
|
|
||||||
|
def isDigit(value):
|
||||||
|
return (value >= '0') and (value <= '9')
|
||||||
|
|
||||||
|
def isChar(value, char):
|
||||||
|
return value == char
|
||||||
|
|
||||||
|
def isString(buffer, string):
|
||||||
|
return buffer == string
|
||||||
|
|
||||||
|
|
||||||
|
class RtfTokenParser():
|
||||||
|
def __init__(self, tokens):
|
||||||
|
self.tokens = tokens
|
||||||
|
self.process()
|
||||||
|
self.processUnicode()
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
i = 0
|
||||||
|
newTokens = []
|
||||||
|
while i < len(self.tokens):
|
||||||
|
if isinstance(self.tokens[i], tokenControlSymbol):
|
||||||
|
if isString(self.tokens[i].name, "\\'"):
|
||||||
|
i = i + 1
|
||||||
|
if not isinstance(self.tokens[i], tokenData):
|
||||||
|
raise BaseException('Error: token8bitChar without data.')
|
||||||
|
if len(self.tokens[i].data) < 2:
|
||||||
|
raise BaseException('Error: token8bitChar without data.')
|
||||||
|
newTokens.append(token8bitChar(self.tokens[i].data[0:2]))
|
||||||
|
if len(self.tokens[i].data) > 2:
|
||||||
|
newTokens.append(tokenData(self.tokens[i].data[2:]))
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
newTokens.append(self.tokens[i])
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
self.tokens = list(newTokens)
|
||||||
|
|
||||||
|
def processUnicode(self):
|
||||||
|
i = 0
|
||||||
|
newTokens = []
|
||||||
|
ucNbStack = [1]
|
||||||
|
while i < len(self.tokens):
|
||||||
|
if isinstance(self.tokens[i], tokenDelimitatorStart):
|
||||||
|
ucNbStack.append(ucNbStack[len(ucNbStack) - 1])
|
||||||
|
newTokens.append(self.tokens[i])
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
if isinstance(self.tokens[i], tokenDelimitatorEnd):
|
||||||
|
ucNbStack.pop()
|
||||||
|
newTokens.append(self.tokens[i])
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
if isinstance(self.tokens[i], tokenControlWordWithNumericArgument):
|
||||||
|
if isString(self.tokens[i].name, '\\uc'):
|
||||||
|
ucNbStack[len(ucNbStack) - 1] = self.tokens[i].argument
|
||||||
|
newTokens.append(self.tokens[i])
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
if isString(self.tokens[i].name, '\\u'):
|
||||||
|
x = i
|
||||||
|
j = 0
|
||||||
|
i = i + 1
|
||||||
|
replace = []
|
||||||
|
partialData = None
|
||||||
|
ucn = ucNbStack[len(ucNbStack) - 1]
|
||||||
|
while (i < len(self.tokens)) and (j < ucn):
|
||||||
|
if isinstance(self.tokens[i], tokenDelimitatorStart):
|
||||||
|
break
|
||||||
|
if isinstance(self.tokens[i], tokenDelimitatorEnd):
|
||||||
|
break
|
||||||
|
if isinstance(self.tokens[i], tokenData):
|
||||||
|
if len(self.tokens[i].data) >= ucn - j:
|
||||||
|
replace.append(tokenData(self.tokens[i].data[0 : ucn - j]))
|
||||||
|
if len(self.tokens[i].data) > ucn - j:
|
||||||
|
partialData = tokenData(self.tokens[i].data[ucn - j:])
|
||||||
|
i = i + 1
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
replace.append(self.tokens[i])
|
||||||
|
j = j + len(self.tokens[i].data)
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
if isinstance(self.tokens[i], token8bitChar) or isinstance(self.tokens[i], tokenBinN):
|
||||||
|
replace.append(self.tokens[i])
|
||||||
|
i = i + 1
|
||||||
|
j = j + 1
|
||||||
|
continue
|
||||||
|
raise BaseException('Error: incorect utf replacement.')
|
||||||
|
|
||||||
|
#calibre rtf2xml does not support utfreplace
|
||||||
|
replace = []
|
||||||
|
|
||||||
|
newTokens.append(tokenUnicode(self.tokens[x].argument, self.tokens[x].separator, ucNbStack[len(ucNbStack) - 1], replace))
|
||||||
|
if partialData != None:
|
||||||
|
newTokens.append(partialData)
|
||||||
|
continue
|
||||||
|
|
||||||
|
newTokens.append(self.tokens[i])
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
self.tokens = list(newTokens)
|
||||||
|
|
||||||
|
|
||||||
|
def toRTF(self):
|
||||||
|
result = []
|
||||||
|
for token in self.tokens:
|
||||||
|
result.append(token.toRTF())
|
||||||
|
return "".join(result)
|
||||||
|
|
||||||
|
|
||||||
|
class RtfTokenizer():
|
||||||
|
def __init__(self, rtfData):
|
||||||
|
self.rtfData = []
|
||||||
|
self.tokens = []
|
||||||
|
self.rtfData = rtfData
|
||||||
|
self.tokenize()
|
||||||
|
|
||||||
|
def tokenize(self):
|
||||||
|
i = 0
|
||||||
|
lastDataStart = -1
|
||||||
|
while i < len(self.rtfData):
|
||||||
|
|
||||||
|
if isChar(self.rtfData[i], '{'):
|
||||||
|
if lastDataStart > -1:
|
||||||
|
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
|
||||||
|
lastDataStart = -1
|
||||||
|
self.tokens.append(tokenDelimitatorStart())
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if isChar(self.rtfData[i], '}'):
|
||||||
|
if lastDataStart > -1:
|
||||||
|
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
|
||||||
|
lastDataStart = -1
|
||||||
|
self.tokens.append(tokenDelimitatorEnd())
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if isChar(self.rtfData[i], '\\'):
|
||||||
|
if i + 1 >= len(self.rtfData):
|
||||||
|
raise BaseException('Error: Control character found at the end of the document.')
|
||||||
|
|
||||||
|
if lastDataStart > -1:
|
||||||
|
self.tokens.append(tokenData(self.rtfData[lastDataStart : i]))
|
||||||
|
lastDataStart = -1
|
||||||
|
|
||||||
|
tokenStart = i
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
#Control Words
|
||||||
|
if isAsciiLetter(self.rtfData[i]):
|
||||||
|
#consume <ASCII Letter Sequence>
|
||||||
|
consumed = False
|
||||||
|
while i < len(self.rtfData):
|
||||||
|
if not isAsciiLetter(self.rtfData[i]):
|
||||||
|
tokenEnd = i
|
||||||
|
consumed = True
|
||||||
|
break
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
if not consumed:
|
||||||
|
raise BaseException('Error (at:%d): Control Word without end.'%(tokenStart))
|
||||||
|
|
||||||
|
#we have numeric argument before delimiter
|
||||||
|
if isChar(self.rtfData[i], '-') or isDigit(self.rtfData[i]):
|
||||||
|
#consume the numeric argument
|
||||||
|
consumed = False
|
||||||
|
l = 0
|
||||||
|
while i < len(self.rtfData):
|
||||||
|
if not isDigit(self.rtfData[i]):
|
||||||
|
consumed = True
|
||||||
|
break
|
||||||
|
l = l + 1
|
||||||
|
i = i + 1
|
||||||
|
if l > 10 :
|
||||||
|
raise BaseException('Error (at:%d): Too many digits in control word numeric argument.'%[tokenStart])
|
||||||
|
|
||||||
|
if not consumed:
|
||||||
|
raise BaseException('Error (at:%d): Control Word without numeric argument end.'%[tokenStart])
|
||||||
|
|
||||||
|
separator = ''
|
||||||
|
if isChar(self.rtfData[i], ' '):
|
||||||
|
separator = ' '
|
||||||
|
|
||||||
|
controlWord = self.rtfData[tokenStart: tokenEnd]
|
||||||
|
if tokenEnd < i:
|
||||||
|
value = int(self.rtfData[tokenEnd: i])
|
||||||
|
if isString(controlWord, "\\bin"):
|
||||||
|
i = i + value
|
||||||
|
self.tokens.append(tokenBinN(self.rtfData[tokenStart:i], separator))
|
||||||
|
else:
|
||||||
|
self.tokens.append(tokenControlWordWithNumericArgument(controlWord, value, separator))
|
||||||
|
else:
|
||||||
|
self.tokens.append(tokenControlWord(controlWord, separator))
|
||||||
|
#space delimiter, we should discard it
|
||||||
|
if self.rtfData[i] == ' ':
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
#Control Symbol
|
||||||
|
else:
|
||||||
|
self.tokens.append(tokenControlSymbol(self.rtfData[tokenStart : i + 1]))
|
||||||
|
i = i + 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if lastDataStart < 0:
|
||||||
|
lastDataStart = i
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
def toRTF(self):
|
||||||
|
result = []
|
||||||
|
for token in self.tokens:
|
||||||
|
result.append(token.toRTF())
|
||||||
|
return "".join(result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print ("Usage %prog rtfFileToConvert")
|
||||||
|
sys.exit()
|
||||||
|
f = open(sys.argv[1], 'rb')
|
||||||
|
data = f.read()
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
tokenizer = RtfTokenizer(data)
|
||||||
|
parsedTokens = RtfTokenParser(tokenizer.tokens)
|
||||||
|
|
||||||
|
data = parsedTokens.toRTF()
|
||||||
|
|
||||||
|
f = open(sys.argv[1], 'w')
|
||||||
|
f.write(data)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
|
@ -10,11 +10,12 @@ from PyQt4.QtGui import QFileDialog, QMessageBox, QPixmap, QFileIconProvider, \
|
|||||||
ORG_NAME = 'KovidsBrain'
|
ORG_NAME = 'KovidsBrain'
|
||||||
APP_UID = 'libprs500'
|
APP_UID = 'libprs500'
|
||||||
from calibre import islinux, iswindows, isosx
|
from calibre import islinux, iswindows, isosx
|
||||||
from calibre.utils.config import Config, ConfigProxy, dynamic
|
from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
|
||||||
from calibre.utils.localization import set_qt_translator
|
from calibre.utils.localization import set_qt_translator
|
||||||
from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
|
from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
|
gprefs = JSONConfig('gui')
|
||||||
|
|
||||||
NONE = QVariant() #: Null value to return from the data function of item models
|
NONE = QVariant() #: Null value to return from the data function of item models
|
||||||
|
|
||||||
|
@ -4,10 +4,14 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
from calibre.customize.conversion import OptionRecommendation, DummyReporter
|
from calibre.customize.conversion import OptionRecommendation, DummyReporter
|
||||||
from calibre.ebooks.conversion.plumber import Plumber
|
from calibre.ebooks.conversion.plumber import Plumber
|
||||||
# ?from calibre.library.catalog import Catalog
|
from calibre.customize.ui import plugin_for_catalog_format
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
|
from calibre.gui2 import choose_dir, Application
|
||||||
|
|
||||||
def gui_convert(input, output, recommendations, notification=DummyReporter(),
|
def gui_convert(input, output, recommendations, notification=DummyReporter(),
|
||||||
abort_after_input_dump=False, log=None):
|
abort_after_input_dump=False, log=None):
|
||||||
@ -21,7 +25,7 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(),
|
|||||||
|
|
||||||
plumber.run()
|
plumber.run()
|
||||||
|
|
||||||
def gui_catalog(fmt, title, dbspec, ids, out_file_name,
|
def gui_catalog(fmt, title, dbspec, ids, out_file_name, fmt_options,
|
||||||
notification=DummyReporter(), log=None):
|
notification=DummyReporter(), log=None):
|
||||||
if log is None:
|
if log is None:
|
||||||
log = Log()
|
log = Log()
|
||||||
@ -33,19 +37,25 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name,
|
|||||||
else: # To be implemented in the future
|
else: # To be implemented in the future
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Implement the interface to the catalog generating code here
|
# Create a minimal OptionParser that we can append to
|
||||||
#db
|
parser = OptionParser()
|
||||||
log("gui2.convert.gui_conversion:gui_catalog()")
|
args = []
|
||||||
log("fmt: %s" % fmt)
|
parser.add_option("--verbose", action="store_true", dest="verbose", default=True)
|
||||||
log("title: %s" % title)
|
opts, args = parser.parse_args()
|
||||||
log("dbspec: %s" % dbspec)
|
|
||||||
log("ids: %s" % ids)
|
# Populate opts
|
||||||
log("out_file_name: %s" % out_file_name)
|
opts.ids = ids
|
||||||
|
opts.search_text = None
|
||||||
|
opts.sort_by = None
|
||||||
|
|
||||||
|
# Extract the option dictionary to comma-separated lists
|
||||||
|
for option in fmt_options:
|
||||||
|
setattr(opts,option, ','.join(fmt_options[option]))
|
||||||
|
|
||||||
|
# Fetch and run the plugin for fmt
|
||||||
|
plugin = plugin_for_catalog_format(fmt)
|
||||||
|
plugin.run(out_file_name, opts, db)
|
||||||
|
|
||||||
# This needs to call the .run() method of the plugin associated with fmt
|
|
||||||
# Needs to set up options before the call
|
|
||||||
# catalog = Catalog(out_file_name, options, dbspec)
|
|
||||||
# Can I call library.cli:catalog_option_parser()?
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,15 +12,18 @@ from PyQt4.Qt import QDialog, QWidget
|
|||||||
|
|
||||||
from calibre.customize.ui import config
|
from calibre.customize.ui import config
|
||||||
from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
|
from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
|
||||||
from calibre.gui2 import dynamic
|
from calibre.gui2 import gprefs, dynamic
|
||||||
from calibre.customize.ui import available_catalog_formats, catalog_plugins
|
from calibre.customize.ui import available_catalog_formats, catalog_plugins
|
||||||
from calibre.gui2.catalog.catalog_csv_xml import PluginWidget
|
from calibre.gui2.catalog.catalog_csv_xml import PluginWidget
|
||||||
|
|
||||||
class Catalog(QDialog, Ui_Dialog):
|
class Catalog(QDialog, Ui_Dialog):
|
||||||
|
''' Catalog Dialog builder'''
|
||||||
|
widgets = []
|
||||||
|
|
||||||
def __init__(self, parent, dbspec, ids):
|
def __init__(self, parent, dbspec, ids):
|
||||||
import re, cStringIO
|
import re, cStringIO
|
||||||
from calibre import prints as info
|
from calibre import prints as info
|
||||||
|
from calibre.gui2 import dynamic
|
||||||
from PyQt4.uic import compileUi
|
from PyQt4.uic import compileUi
|
||||||
|
|
||||||
QDialog.__init__(self, parent)
|
QDialog.__init__(self, parent)
|
||||||
@ -42,6 +45,7 @@ class Catalog(QDialog, Ui_Dialog):
|
|||||||
self.fmts = []
|
self.fmts = []
|
||||||
|
|
||||||
from calibre.customize.builtins import plugins as builtin_plugins
|
from calibre.customize.builtins import plugins as builtin_plugins
|
||||||
|
from calibre.customize import CatalogPlugin
|
||||||
|
|
||||||
for plugin in catalog_plugins():
|
for plugin in catalog_plugins():
|
||||||
if plugin.name in config['disabled_plugins']:
|
if plugin.name in config['disabled_plugins']:
|
||||||
@ -49,38 +53,30 @@ class Catalog(QDialog, Ui_Dialog):
|
|||||||
|
|
||||||
name = plugin.name.lower().replace(' ', '_')
|
name = plugin.name.lower().replace(' ', '_')
|
||||||
if type(plugin) in builtin_plugins:
|
if type(plugin) in builtin_plugins:
|
||||||
info("Adding tab for builtin Catalog plugin %s" % plugin.name)
|
#info("Adding widget for builtin Catalog plugin %s" % plugin.name)
|
||||||
try:
|
try:
|
||||||
catalog_widget = __import__('calibre.gui2.catalog.'+name,
|
catalog_widget = __import__('calibre.gui2.catalog.'+name,
|
||||||
fromlist=[1])
|
fromlist=[1])
|
||||||
pw = catalog_widget.PluginWidget()
|
pw = catalog_widget.PluginWidget()
|
||||||
pw.initialize()
|
pw.initialize(name)
|
||||||
pw.ICON = I('forward.svg')
|
pw.ICON = I('forward.svg')
|
||||||
page = self.tabs.addTab(pw,pw.TITLE)
|
self.widgets.append(pw)
|
||||||
[self.fmts.append([file_type, pw.sync_enabled]) for file_type in plugin.file_types]
|
[self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
|
||||||
info("\tSupported formats: %s" % plugin.file_types)
|
|
||||||
info("\tsync_enabled: %s" % pw.sync_enabled)
|
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
info("ImportError with %s" % name)
|
info("ImportError with %s" % name)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# Test to see if .ui and .py files exist in tmpdir/calibre_plugin_resources
|
# Load dynamic tab
|
||||||
form = os.path.join(tempfile.gettempdir(),
|
form = os.path.join(plugin.resources_path,'%s.ui' % name)
|
||||||
'calibre_plugin_resources','%s.ui' % name)
|
klass = os.path.join(plugin.resources_path,'%s.py' % name)
|
||||||
klass = os.path.join(tempfile.gettempdir(),
|
compiled_form = os.path.join(plugin.resources_path,'%s_ui.py' % name)
|
||||||
'calibre_plugin_resources','%s.py' % name)
|
|
||||||
compiled_form = os.path.join(tempfile.gettempdir(),
|
|
||||||
'calibre_plugin_resources','%s_ui.py' % name)
|
|
||||||
plugin_resources = os.path.join(tempfile.gettempdir(),'calibre_plugin_resources')
|
|
||||||
|
|
||||||
if os.path.exists(form) and os.path.exists(klass):
|
if os.path.exists(form) and os.path.exists(klass):
|
||||||
info("Adding tab for user-installed Catalog plugin %s" % plugin.name)
|
#info("Adding widget for user-installed Catalog plugin %s" % plugin.name)
|
||||||
|
|
||||||
# Compile the form provided in plugin.zip
|
# Compile the .ui form provided in plugin.zip
|
||||||
if not os.path.exists(compiled_form) or \
|
if not os.path.exists(compiled_form):
|
||||||
os.stat(form).st_mtime > os.stat(compiled_form).st_mtime:
|
# info('\tCompiling form', form)
|
||||||
info('\tCompiling form', form)
|
|
||||||
buf = cStringIO.StringIO()
|
buf = cStringIO.StringIO()
|
||||||
compileUi(form, buf)
|
compileUi(form, buf)
|
||||||
dat = buf.getvalue()
|
dat = buf.getvalue()
|
||||||
@ -88,35 +84,41 @@ class Catalog(QDialog, Ui_Dialog):
|
|||||||
re.DOTALL).sub(r'_("\1")', dat)
|
re.DOTALL).sub(r'_("\1")', dat)
|
||||||
open(compiled_form, 'wb').write(dat)
|
open(compiled_form, 'wb').write(dat)
|
||||||
|
|
||||||
# Import the Catalog class from the dynamic .py file
|
# Import the dynamic PluginWidget() from .py file provided in plugin.zip
|
||||||
try:
|
try:
|
||||||
sys.path.insert(0, plugin_resources)
|
sys.path.insert(0, plugin.resources_path)
|
||||||
catalog_widget = __import__(name, fromlist=[1])
|
catalog_widget = __import__(name, fromlist=[1])
|
||||||
dpw = catalog_widget.PluginWidget()
|
pw = catalog_widget.PluginWidget()
|
||||||
dpw.initialize()
|
pw.initialize(name)
|
||||||
dpw.ICON = I('forward.svg')
|
pw.ICON = I('forward.svg')
|
||||||
page = self.tabs.addTab(dpw, dpw.TITLE)
|
self.widgets.append(pw)
|
||||||
[self.fmts.append([file_type, dpw.sync_enabled]) for file_type in plugin.file_types]
|
[self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
|
||||||
info("\tSupported formats: %s" % plugin.file_types)
|
|
||||||
info("\tsync_enabled: %s" % dpw.sync_enabled)
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
info("ImportError with %s" % name)
|
info("ImportError with %s" % name)
|
||||||
continue
|
continue
|
||||||
finally:
|
finally:
|
||||||
sys.path.remove(plugin_resources)
|
sys.path.remove(plugin.resources_path)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
info("No dynamic tab resources found for %s" % name)
|
info("No dynamic tab resources found for %s" % name)
|
||||||
|
|
||||||
|
self.widgets = sorted(self.widgets, key=lambda x:(x.TITLE, x.TITLE))
|
||||||
|
for pw in self.widgets:
|
||||||
|
page = self.tabs.addTab(pw,pw.TITLE)
|
||||||
|
|
||||||
# Generate a sorted list of installed catalog formats/sync_enabled pairs
|
# Generate a sorted list of installed catalog formats/sync_enabled pairs
|
||||||
# Generate a parallel list of sync_enabled[True|False]ß
|
fmts = sorted([x[0] for x in self.fmts])
|
||||||
self.fmts = sorted([x[0].upper() for x in self.fmts])
|
|
||||||
|
self.sync_enabled_formats = []
|
||||||
|
for fmt in self.fmts:
|
||||||
|
if fmt[1]:
|
||||||
|
self.sync_enabled_formats.append(fmt[0])
|
||||||
|
|
||||||
# Callback when format changes
|
# Callback when format changes
|
||||||
self.format.currentIndexChanged.connect(self.format_changed)
|
self.format.currentIndexChanged.connect(self.format_changed)
|
||||||
|
|
||||||
# Add the installed catalog format list to the format QComboBox
|
# Add the installed catalog format list to the format QComboBox
|
||||||
self.format.addItems(self.fmts)
|
self.format.addItems(fmts)
|
||||||
|
|
||||||
pref = dynamic.get('catalog_preferred_format', 'CSV')
|
pref = dynamic.get('catalog_preferred_format', 'CSV')
|
||||||
idx = self.format.findText(pref)
|
idx = self.format.findText(pref)
|
||||||
@ -127,9 +129,8 @@ class Catalog(QDialog, Ui_Dialog):
|
|||||||
self.sync.setChecked(dynamic.get('catalog_sync_to_device', True))
|
self.sync.setChecked(dynamic.get('catalog_sync_to_device', True))
|
||||||
|
|
||||||
def format_changed(self, idx):
|
def format_changed(self, idx):
|
||||||
print "format_changed(idx): idx: %d" % idx
|
|
||||||
cf = unicode(self.format.currentText())
|
cf = unicode(self.format.currentText())
|
||||||
if cf in ('EPUB', 'MOBI'):
|
if cf in self.sync_enabled_formats:
|
||||||
self.sync.setEnabled(True)
|
self.sync.setEnabled(True)
|
||||||
else:
|
else:
|
||||||
self.sync.setDisabled(True)
|
self.sync.setDisabled(True)
|
||||||
|
@ -239,23 +239,35 @@ def fetch_scheduled_recipe(arg):
|
|||||||
def generate_catalog(parent, dbspec, ids):
|
def generate_catalog(parent, dbspec, ids):
|
||||||
from calibre.gui2.dialogs.catalog import Catalog
|
from calibre.gui2.dialogs.catalog import Catalog
|
||||||
|
|
||||||
# Build the Catalog dialog
|
# Build the Catalog dialog in gui2.dialogs.catalog
|
||||||
d = Catalog(parent, dbspec, ids)
|
d = Catalog(parent, dbspec, ids)
|
||||||
|
|
||||||
if d.exec_() != d.Accepted:
|
if d.exec_() != d.Accepted:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Create the output file
|
# Create the output file
|
||||||
out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())
|
out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())
|
||||||
|
|
||||||
|
# Retrieve plugin options
|
||||||
|
fmt_options = {}
|
||||||
|
for x in range(d.tabs.count()):
|
||||||
|
if str(d.tabs.tabText(x)).find(str(d.catalog_format)) > -1:
|
||||||
|
for fmt in d.fmts:
|
||||||
|
if fmt[0] == d.catalog_format:
|
||||||
|
fmt_options = fmt[2].options()
|
||||||
|
# print "gui2.tools:generate_catalog(): options for %s: %s" % (fmt[0], fmt_options)
|
||||||
|
|
||||||
args = [
|
args = [
|
||||||
d.catalog_format,
|
d.catalog_format,
|
||||||
d.catalog_title,
|
d.catalog_title,
|
||||||
dbspec,
|
dbspec,
|
||||||
ids,
|
ids,
|
||||||
out.name,
|
out.name,
|
||||||
|
fmt_options
|
||||||
]
|
]
|
||||||
out.close()
|
out.close()
|
||||||
|
|
||||||
|
# This calls gui2.convert.gui_conversion:gui_catalog()
|
||||||
return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
|
return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
|
||||||
d.catalog_title
|
d.catalog_title
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
'''The main GUI'''
|
'''The main GUI'''
|
||||||
|
|
||||||
import os, sys, textwrap, collections, time
|
import atexit, os, shutil, sys, tempfile, textwrap, collections, time
|
||||||
from xml.parsers.expat import ExpatError
|
from xml.parsers.expat import ExpatError
|
||||||
from Queue import Queue, Empty
|
from Queue import Queue, Empty
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
@ -31,7 +31,7 @@ from calibre.utils.ipc.server import Server
|
|||||||
from calibre.gui2 import warning_dialog, choose_files, error_dialog, \
|
from calibre.gui2 import warning_dialog, choose_files, error_dialog, \
|
||||||
question_dialog,\
|
question_dialog,\
|
||||||
pixmap_to_data, choose_dir, \
|
pixmap_to_data, choose_dir, \
|
||||||
Dispatcher, \
|
Dispatcher, gprefs, \
|
||||||
available_height, \
|
available_height, \
|
||||||
max_available_height, config, info_dialog, \
|
max_available_height, config, info_dialog, \
|
||||||
available_width, GetMetadata
|
available_width, GetMetadata
|
||||||
@ -357,7 +357,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
cm.addAction(_('Bulk convert'))
|
cm.addAction(_('Bulk convert'))
|
||||||
cm.addSeparator()
|
cm.addSeparator()
|
||||||
ac = cm.addAction(
|
ac = cm.addAction(
|
||||||
_('Create catalog of the books in your calibre library'))
|
_('Create catalog of books in your calibre library'))
|
||||||
ac.triggered.connect(self.generate_catalog)
|
ac.triggered.connect(self.generate_catalog)
|
||||||
self.action_convert.setMenu(cm)
|
self.action_convert.setMenu(cm)
|
||||||
self._convert_single_hook = partial(self.convert_ebook, bulk=False)
|
self._convert_single_hook = partial(self.convert_ebook, bulk=False)
|
||||||
@ -518,7 +518,21 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
self.connect(self.library_view.model(), SIGNAL('count_changed(int)'),
|
self.connect(self.library_view.model(), SIGNAL('count_changed(int)'),
|
||||||
self.tags_view.recount)
|
self.tags_view.recount)
|
||||||
self.connect(self.search, SIGNAL('cleared()'), self.tags_view.clear)
|
self.connect(self.search, SIGNAL('cleared()'), self.tags_view.clear)
|
||||||
|
if not gprefs.get('quick_start_guide_added', False):
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
mi = MetaInformation(_('Calibre Quick Start Guide'), ['John Schember'])
|
||||||
|
mi.author_sort = 'Schember, John'
|
||||||
|
mi.comments = "A guide to get you up an running with calibre"
|
||||||
|
mi.publisher = 'calibre'
|
||||||
|
self.library_view.model().add_books([P('quick_start.epub')], ['epub'],
|
||||||
|
[mi])
|
||||||
|
gprefs['quick_start_guide_added'] = True
|
||||||
|
self.library_view.model().books_added(1)
|
||||||
|
if hasattr(self, 'db_images'):
|
||||||
|
self.db_images.reset()
|
||||||
|
|
||||||
self.library_view.model().count_changed()
|
self.library_view.model().count_changed()
|
||||||
|
|
||||||
########################### Cover Flow ################################
|
########################### Cover Flow ################################
|
||||||
self.cover_flow = None
|
self.cover_flow = None
|
||||||
if CoverFlow is not None:
|
if CoverFlow is not None:
|
||||||
@ -1008,7 +1022,6 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
return
|
return
|
||||||
self._add_books(books, to_device)
|
self._add_books(books, to_device)
|
||||||
|
|
||||||
|
|
||||||
def _add_books(self, paths, to_device, on_card=None):
|
def _add_books(self, paths, to_device, on_card=None):
|
||||||
if on_card is None:
|
if on_card is None:
|
||||||
on_card = 'carda' if self.stack.currentIndex() == 2 else 'cardb' if self.stack.currentIndex() == 3 else None
|
on_card = 'carda' if self.stack.currentIndex() == 2 else 'cardb' if self.stack.currentIndex() == 3 else None
|
||||||
@ -1348,24 +1361,29 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
|
|
||||||
def generate_catalog(self):
|
def generate_catalog(self):
|
||||||
rows = self.library_view.selectionModel().selectedRows()
|
rows = self.library_view.selectionModel().selectedRows()
|
||||||
if not rows:
|
if not rows or len(rows) < 2:
|
||||||
rows = xrange(self.library_view.model().rowCount(QModelIndex()))
|
rows = xrange(self.library_view.model().rowCount(QModelIndex()))
|
||||||
ids = map(self.library_view.model().id, rows)
|
ids = map(self.library_view.model().id, rows)
|
||||||
|
|
||||||
dbspec = None
|
dbspec = None
|
||||||
if not ids:
|
if not ids:
|
||||||
return error_dialog(self, _('No books selected'),
|
return error_dialog(self, _('No books selected'),
|
||||||
_('No books selected to generate catalog for'),
|
_('No books selected to generate catalog for'),
|
||||||
show=True)
|
show=True)
|
||||||
# calibre.gui2.tools:generate_catalog()
|
|
||||||
|
# Calling gui2.tools:generate_catalog()
|
||||||
ret = generate_catalog(self, dbspec, ids)
|
ret = generate_catalog(self, dbspec, ids)
|
||||||
if ret is None:
|
if ret is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
func, args, desc, out, sync, title = ret
|
func, args, desc, out, sync, title = ret
|
||||||
|
|
||||||
fmt = os.path.splitext(out)[1][1:].upper()
|
fmt = os.path.splitext(out)[1][1:].upper()
|
||||||
job = self.job_manager.run_job(
|
job = self.job_manager.run_job(
|
||||||
Dispatcher(self.catalog_generated), func, args=args,
|
Dispatcher(self.catalog_generated), func, args=args,
|
||||||
description=desc)
|
description=desc)
|
||||||
job.catalog_file_path = out
|
job.catalog_file_path = out
|
||||||
|
job.fmt = fmt
|
||||||
job.catalog_sync, job.catalog_title = sync, title
|
job.catalog_sync, job.catalog_title = sync, title
|
||||||
self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
|
self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
|
||||||
|
|
||||||
@ -1380,7 +1398,12 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
dynamic.set('catalogs_to_be_synced', sync)
|
dynamic.set('catalogs_to_be_synced', sync)
|
||||||
self.status_bar.showMessage(_('Catalog generated.'), 3000)
|
self.status_bar.showMessage(_('Catalog generated.'), 3000)
|
||||||
self.sync_catalogs()
|
self.sync_catalogs()
|
||||||
|
if job.fmt in ['CSV','XML']:
|
||||||
|
export_dir = choose_dir(self, 'Export Catalog Directory',
|
||||||
|
'Select destination for %s.%s' % (job.catalog_title, job.fmt.lower()))
|
||||||
|
if export_dir:
|
||||||
|
destination = os.path.join(export_dir, '%s.%s' % (job.catalog_title, job.fmt.lower()))
|
||||||
|
shutil.copyfile(job.catalog_file_path, destination)
|
||||||
|
|
||||||
############################### Fetch news #################################
|
############################### Fetch news #################################
|
||||||
|
|
||||||
|
@ -40,10 +40,9 @@ class CSV_XML(CatalogPlugin):
|
|||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
|
|
||||||
log = Log()
|
log = Log()
|
||||||
self.fmt = path_to_output[path_to_output.rfind('.') + 1:]
|
|
||||||
# Update to .partition
|
|
||||||
self.fmt = path_to_output.rpartition('.')[2]
|
self.fmt = path_to_output.rpartition('.')[2]
|
||||||
if opts.verbose:
|
|
||||||
|
if False and opts.verbose:
|
||||||
log("%s:run" % self.name)
|
log("%s:run" % self.name)
|
||||||
log(" path_to_output: %s" % path_to_output)
|
log(" path_to_output: %s" % path_to_output)
|
||||||
log(" Output format: %s" % self.fmt)
|
log(" Output format: %s" % self.fmt)
|
||||||
|
@ -644,6 +644,10 @@ def catalog_option_parser(args):
|
|||||||
output, fmt = validate_command_line(parser, args, log)
|
output, fmt = validate_command_line(parser, args, log)
|
||||||
|
|
||||||
# Add options common to all catalog plugins
|
# Add options common to all catalog plugins
|
||||||
|
parser.add_option('-i', '--ids', default=None, dest='ids',
|
||||||
|
help=_("Comma-separated list of database IDs to catalog.\n"
|
||||||
|
"If declared, --search is ignored.\n"
|
||||||
|
"Default: all"))
|
||||||
parser.add_option('-s', '--search', default=None, dest='search_text',
|
parser.add_option('-s', '--search', default=None, dest='search_text',
|
||||||
help=_("Filter the results by the search query. "
|
help=_("Filter the results by the search query. "
|
||||||
"For the format of the search query, please see "
|
"For the format of the search query, please see "
|
||||||
@ -656,31 +660,6 @@ def catalog_option_parser(args):
|
|||||||
# Add options specific to fmt plugin
|
# Add options specific to fmt plugin
|
||||||
plugin = add_plugin_parser_options(fmt, parser, log)
|
plugin = add_plugin_parser_options(fmt, parser, log)
|
||||||
|
|
||||||
# Merge options from GUI Preferences
|
|
||||||
'''
|
|
||||||
# Placeholder sample code until we implement GUI preferences
|
|
||||||
from calibre.library.save_to_disk import config
|
|
||||||
c = config()
|
|
||||||
for pref in ['asciiize', 'update_metadata', 'write_opf', 'save_cover']:
|
|
||||||
opt = c.get_option(pref)
|
|
||||||
switch = '--dont-'+pref.replace('_', '-')
|
|
||||||
parser.add_option(switch, default=True, action='store_false',
|
|
||||||
help=opt.help+' '+_('Specifying this switch will turn '
|
|
||||||
'this behavior off.'), dest=pref)
|
|
||||||
|
|
||||||
for pref in ['timefmt', 'template', 'formats']:
|
|
||||||
opt = c.get_option(pref)
|
|
||||||
switch = '--'+pref
|
|
||||||
parser.add_option(switch, default=opt.default,
|
|
||||||
help=opt.help, dest=pref)
|
|
||||||
|
|
||||||
for pref in ('replace_whitespace', 'to_lowercase'):
|
|
||||||
opt = c.get_option(pref)
|
|
||||||
switch = '--'+pref.replace('_', '-')
|
|
||||||
parser.add_option(switch, default=False, action='store_true',
|
|
||||||
help=opt.help)
|
|
||||||
'''
|
|
||||||
|
|
||||||
return parser, plugin, log
|
return parser, plugin, log
|
||||||
|
|
||||||
def command_catalog(args, dbpath):
|
def command_catalog(args, dbpath):
|
||||||
@ -693,6 +672,9 @@ def command_catalog(args, dbpath):
|
|||||||
return 1
|
return 1
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
log("library.cli:command_catalog dispatching to plugin %s" % plugin.name)
|
log("library.cli:command_catalog dispatching to plugin %s" % plugin.name)
|
||||||
|
if opts.ids:
|
||||||
|
opts.ids = [int(id) for id in opts.ids.split(',')]
|
||||||
|
|
||||||
with plugin:
|
with plugin:
|
||||||
plugin.run(args[1], opts, get_db(dbpath, opts))
|
plugin.run(args[1], opts, get_db(dbpath, opts))
|
||||||
return 0
|
return 0
|
||||||
|
@ -1634,13 +1634,15 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
for i in iter(self):
|
for i in iter(self):
|
||||||
yield i[x]
|
yield i[x]
|
||||||
|
|
||||||
def get_data_as_dict(self, prefix=None, authors_as_string=False):
|
def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
|
||||||
'''
|
'''
|
||||||
Return all metadata stored in the database as a dict. Includes paths to
|
Return all metadata stored in the database as a dict. Includes paths to
|
||||||
the cover and each format.
|
the cover and each format.
|
||||||
|
|
||||||
:param prefix: The prefix for all paths. By default, the prefix is the absolute path
|
:param prefix: The prefix for all paths. By default, the prefix is the absolute path
|
||||||
to the library folder.
|
to the library folder.
|
||||||
|
:param ids: Set of ids to return the data for. If None return data for
|
||||||
|
all entries in database.
|
||||||
'''
|
'''
|
||||||
if prefix is None:
|
if prefix is None:
|
||||||
prefix = self.library_path
|
prefix = self.library_path
|
||||||
@ -1650,11 +1652,14 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
data = []
|
data = []
|
||||||
for record in self.data:
|
for record in self.data:
|
||||||
if record is None: continue
|
if record is None: continue
|
||||||
|
db_id = record[FIELD_MAP['id']]
|
||||||
|
if ids is not None and db_id not in ids:
|
||||||
|
continue
|
||||||
x = {}
|
x = {}
|
||||||
for field in FIELDS:
|
for field in FIELDS:
|
||||||
x[field] = record[FIELD_MAP[field]]
|
x[field] = record[FIELD_MAP[field]]
|
||||||
data.append(x)
|
data.append(x)
|
||||||
x['id'] = record[FIELD_MAP['id']]
|
x['id'] = db_id
|
||||||
x['formats'] = []
|
x['formats'] = []
|
||||||
if not x['authors']:
|
if not x['authors']:
|
||||||
x['authors'] = _('Unknown')
|
x['authors'] = _('Unknown')
|
||||||
|
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
'''
|
'''
|
||||||
Manage application-wide preferences.
|
Manage application-wide preferences.
|
||||||
'''
|
'''
|
||||||
import os, re, cPickle, textwrap, traceback, plistlib
|
import os, re, cPickle, textwrap, traceback, plistlib, json
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from optparse import OptionParser as _OptionParser
|
from optparse import OptionParser as _OptionParser
|
||||||
@ -564,23 +564,31 @@ class XMLConfig(dict):
|
|||||||
data types.
|
data types.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
EXTENSION = '.plist'
|
||||||
|
|
||||||
def __init__(self, rel_path_to_cf_file):
|
def __init__(self, rel_path_to_cf_file):
|
||||||
dict.__init__(self)
|
dict.__init__(self)
|
||||||
self.file_path = os.path.join(config_dir,
|
self.file_path = os.path.join(config_dir,
|
||||||
*(rel_path_to_cf_file.split('/')))
|
*(rel_path_to_cf_file.split('/')))
|
||||||
self.file_path = os.path.abspath(self.file_path)
|
self.file_path = os.path.abspath(self.file_path)
|
||||||
if not self.file_path.endswith('.plist'):
|
if not self.file_path.endswith(self.EXTENSION):
|
||||||
self.file_path += '.plist'
|
self.file_path += self.EXTENSION
|
||||||
|
|
||||||
self.refresh()
|
self.refresh()
|
||||||
|
|
||||||
|
def raw_to_object(self, raw):
|
||||||
|
return plistlib.readPlistFromString(raw)
|
||||||
|
|
||||||
|
def to_raw(self):
|
||||||
|
return plistlib.writePlistToString(self)
|
||||||
|
|
||||||
def refresh(self):
|
def refresh(self):
|
||||||
d = {}
|
d = {}
|
||||||
if os.path.exists(self.file_path):
|
if os.path.exists(self.file_path):
|
||||||
with ExclusiveFile(self.file_path) as f:
|
with ExclusiveFile(self.file_path) as f:
|
||||||
raw = f.read()
|
raw = f.read()
|
||||||
try:
|
try:
|
||||||
d = plistlib.readPlistFromString(raw) if raw.strip() else {}
|
d = self.raw_to_object(raw) if raw.strip() else {}
|
||||||
except SystemError:
|
except SystemError:
|
||||||
pass
|
pass
|
||||||
except:
|
except:
|
||||||
@ -618,11 +626,21 @@ class XMLConfig(dict):
|
|||||||
if not os.path.exists(dpath):
|
if not os.path.exists(dpath):
|
||||||
os.makedirs(dpath, mode=CONFIG_DIR_MODE)
|
os.makedirs(dpath, mode=CONFIG_DIR_MODE)
|
||||||
with ExclusiveFile(self.file_path) as f:
|
with ExclusiveFile(self.file_path) as f:
|
||||||
raw = plistlib.writePlistToString(self)
|
raw = self.to_raw()
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
f.truncate()
|
f.truncate()
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
|
|
||||||
|
class JSONConfig(XMLConfig):
|
||||||
|
|
||||||
|
EXTENSION = '.json'
|
||||||
|
|
||||||
|
def raw_to_object(self, raw):
|
||||||
|
return json.loads(raw.decode('utf-8'))
|
||||||
|
|
||||||
|
def to_raw(self):
|
||||||
|
return json.dumps(self, indent=2)
|
||||||
|
|
||||||
|
|
||||||
def _prefs():
|
def _prefs():
|
||||||
c = Config('global', 'calibre wide preferences')
|
c = Config('global', 'calibre wide preferences')
|
||||||
|
@ -104,6 +104,7 @@ _extra_lang_codes = {
|
|||||||
'en_CY' : _('English (Cyprus)'),
|
'en_CY' : _('English (Cyprus)'),
|
||||||
'en_PK' : _('English (Pakistan)'),
|
'en_PK' : _('English (Pakistan)'),
|
||||||
'en_SG' : _('English (Singapore)'),
|
'en_SG' : _('English (Singapore)'),
|
||||||
|
'en_YE' : _('English (Yemen)'),
|
||||||
'de_AT' : _('German (AT)'),
|
'de_AT' : _('German (AT)'),
|
||||||
'nl' : _('Dutch (NL)'),
|
'nl' : _('Dutch (NL)'),
|
||||||
'nl_BE' : _('Dutch (BE)'),
|
'nl_BE' : _('Dutch (BE)'),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user