Sync to trunk.

This commit is contained in:
John Schember 2009-11-21 21:22:28 -05:00
commit d96542418a
52 changed files with 5724 additions and 4549 deletions

View File

@ -0,0 +1,87 @@
#!/usr/bin/python
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class FokkeEnSukkeRecipe(BasicNewsRecipe) :
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'nl'
description = u'Popular Dutch daily cartoon Fokke en Sukke'
title = u'Fokke en Sukke'
no_stylesheets = True
# For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue
# with that: it treats it as content and displays it as is. Setting this property to empty solves this for me.
template_css = ''
INDEX = u'http://foksuk.nl'
# This cover is not as nice as it could be, needs some work
#cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
def parse_index(self) :
# A list with daynames as they _can_ appear in the index
dayNames = ['maandag', 'dinsdag', 'woensdag', 'donderdag', 'vrijdag', 'zaterdag & zondag']
soup = self.index_to_soup(self.INDEX)
# Find the links for the various cartoons for this week and loop through them
index = soup.find('div', attrs={'class' : 'selectcartoon'})
links = index.findAll('a')
maxIndex = len(links) - 1
articles = []
for i in range(len(links)) :
# The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice.
if i == 0 :
continue
# There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
# If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
if links[i].renderContents() in dayNames :
# If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content
if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
# Got you! Add it to the list
article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''}
articles.append(article)
# If there is a '1', there should be a '2' as well, but better save than sorry
if (i + 2 <= maxIndex) and (links[i + 2].renderContents() == '2') :
# Got you! Add it to the list
article = {'title' : links[i].renderContents() + ' 2', 'date' : u'', 'url' : self.INDEX + links[i + 2]['href'], 'description' : ''}
articles.append(article)
else :
# There is only one cartoon for this day. Add it to the list.
article = {'title' : links[i].renderContents(), 'date' : u'', 'url' : self.INDEX + links[i]['href'], 'description' : ''}
articles.append(article)
# Might as well use the weeknumber as title
week = index.find('span', attrs={'class' : 'week'}).renderContents()
return [[week, articles]]
def preprocess_html(self, soup) :
# This method is called for every page, be it cartoon or TOC. We need to process each in their own way
cartoon = soup.find('div', attrs={'class' : 'cartoon'})
if cartoon :
# It is a cartoon. Extract the title.
title = ''
img = soup.find('img', attrs = {'alt' : True})
if img :
title = img['alt']
# Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that.
tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')])
tag.insert(0, title)
cartoon.insert(0, tag)
# I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier,
# and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook.
select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
if select :
select.extract()
return cartoon
else :
# It is a TOC. Just return the whole lot.
return soup

View File

@ -43,97 +43,45 @@ class Guardian(BasicNewsRecipe):
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
'''
def parse_index(self):
def find_sections(self):
soup = self.index_to_soup('http://www.guardian.co.uk/theguardian')
# find cover pic
img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'})
if img is None: return None
else:
if img is not None:
self.cover_url = img['src']
# end find cover pic
sections = []
ans = []
for li in soup.findAll( 'li'):
section = ''
articles = []
if li.a and li.a.has_key('href'):
url = li.a['href']
if 'mainsection' in url:
section = self.tag_to_string(url)
i = len(section)
idx = soup.find('div', id='book-index')
for s in idx.findAll('strong', attrs={'class':'book'}):
a = s.find('a', href=True)
yield (self.tag_to_string(a), a['href'])
index1 = section.rfind('/',0,i)
section = section[index1+1:i]
sections.append(section)
#find the articles in the Main Section start
def find_articles(self, url):
soup = self.index_to_soup(url)
date = strftime('%a, %d %b')
descl = []
for desclist in soup.findAll(name='div',attrs={'class':"trailtext"}):
descl.append(self.tag_to_string(desclist).strip())
t = -1
for tag in soup.findAll('h3'):
t = t+1
for a in tag.findAll('a'):
if t < len(descl):
desc = descl[t]
else:
desc = ''
if a and a.has_key('href'):
url2 = a['href']
else:
url2 =''
div = soup.find('div', attrs={'class':'book-index'})
for ul in div.findAll('ul', attrs={'class':'trailblock'}):
for li in ul.findAll('li'):
a = li.find(href=True)
if not a:
continue
title = self.tag_to_string(a)
url = a['href']
if not title or not url:
continue
tt = li.find('div', attrs={'class':'trailtext'})
if tt is not None:
for da in tt.findAll('a'): da.extract()
desc = self.tag_to_string(tt).strip()
yield {
'title': title, 'url':url, 'description':desc,
'date' : strftime('%a, %d %b'),
}
if len(articles) == 0: #First article
articles.append({
'title':title,
'date':date,
'url':url2,
'description':desc,
})
else:
#eliminate duplicates start
if {'title':title,'date':date,'url':url2,'description':desc} in articles :
url2 = ''
#eliminate duplicates end
else:
if 'http://jobs.guardian.co.uk/' in url2:
url2 = ''
else:
articles.append({
'title':title,
'date':date,
'url':url2,
'description':desc,
})
#find the articles in the Main Section end
ans.append( articles)
else:
url =''
titles = map(self.find_title, sections)
ans1 = list(zip(titles,ans))
return ans1[2:]
def find_title(self, section):
d = {'topstories':'Top Stories', 'international':'International', 'editorialsandreply':'Editorials and Reply',
'commentanddebate':'Comment and Debate','uknews':'UK News','saturday':'Saturday','sunday':'Sunday',
'reviews':'Reviews', 'obituaries':'Obituaries'}
return d.get(section, section)
def parse_index(self):
feeds = []
for title, href in self.find_sections():
feeds.append((title, list(self.find_articles(href))))
return feeds
def preprocess_html(self, soup):

View File

@ -0,0 +1,110 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class HBR(BasicNewsRecipe):
title = 'Harvard Business Review'
description = 'To subscribe go to http://hbr.harvardbusiness.org'
needs_subscription = True
__author__ = 'Kovid Goyal and Sujata Raman'
timefmt = ' [%B %Y]'
language = 'en'
no_stylesheets = True
LOGIN_URL = 'http://hbr.harvardbusiness.org/login?request_url=/'
INDEX = 'http://hbr.harvardbusiness.org/current'
keep_only_tags = [dict(name='div', id='content')]
remove_tags = [
dict(id=['articleDate', 'subscriptionModule', 'errorArea',
'feedbackForm', 'relatedModule', 'articleSecondaryModule',
'contentRight', 'summaryLink']),
dict(name='form'),
]
extra_css = '''
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
.article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small; }
#articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
'''
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open(self.LOGIN_URL)
br.select_form(nr=0)
br['ssousername'] = self.username
br['password'] = self.password
raw = br.submit().read()
if 'My Account' not in raw:
raise Exception('Failed to login, are you sure your username and password are correct?')
self.logout_url = None
link = br.find_link(text='(sign out)')
if link:
self.logout_url = link.absolute_url
return br
def cleanup(self):
if self.logout_url is not None:
self.browser.open(self.logout_url)
def map_url(self, url):
if url.endswith('/ar/1'):
return url[:-1]+'pr'
def get_features(self, soup):
div = soup.find('div', id='issueFeatures')
for li in div.findAll('li'):
a = li.find('a', href=True)
url = 'http://hbr.harvardbusiness.org'+a['href']
url = self.map_url(url)
if not url:
continue
title = self.tag_to_string(a)
p = li.find('p')
desc = ''
if p is not None:
desc = self.tag_to_string(p)
yield {'title':title, 'url':url, 'description':desc}
def get_departments(self, soup):
div = soup.find('div', id='issueDepartmentsContent')
for h4 in div.findAll('h4'):
feed = self.tag_to_string(h4)
articles = []
ul = h4.findNextSibling('ul')
for li in ul.findAll('li'):
a = li.find('a', href=True)
url = 'http://hbr.harvardbusiness.org'+a['href']
url = self.map_url(url)
if not url:
continue
title = self.tag_to_string(a)
p = li.find('p')
desc = ''
if p is not None:
desc = self.tag_to_string(p)
articles.append({'title':title, 'url':url, 'description':desc})
yield [feed, articles]
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
feeds = []
feeds.append(('Features', list(self.get_features(soup))))
feeds.extend(self.get_departments(soup))
return feeds
def get_cover_url(self):
cover_url = None
index = 'http://hbr.harvardbusiness.org/current'
soup = self.index_to_soup(index)
link_item = soup.find('img', alt=re.compile("HBR Cover Image"), src=True)
if link_item:
cover_url = 'http://hbr.harvardbusiness.org' + link_item['src']
return cover_url

View File

@ -12,20 +12,29 @@ from calibre.web.feeds.news import BasicNewsRecipe
class KellogInsight(BasicNewsRecipe):
title = 'Kellog Insight'
__author__ = 'Kovid Goyal'
__author__ = 'Kovid Goyal and Sujata Raman'
description = 'Articles from the Kellog School of Management'
no_stylesheets = True
encoding = 'utf-8'
language = 'en'
oldest_article = 60
remove_tags_before = {'name':'h1'}
remove_tags_after = {'class':'col-two-text'}
keep_only_tags = [dict(name='div', attrs={'id':['print_no_comments']})]
remove_tags = [dict(name='div', attrs={'class':'col-three'})]
feeds = [('Articles',
'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
extra_css = '''
h1{font-family:arial; font-size:medium; color:#333333;}
.col-one{font-family:arial; font-size:xx-small;}
.col-two{font-family:arial; font-size:x-small; }
h2{font-family:arial; font-size:small; color:#666666;}
h3{font-family:arial; font-size:small; color:#333333;text-transform: uppercase; font-weight:normal;}
h4{color:#660000;font-family:arial; font-size:x-small;}
.col-two-text{font-family:arial; font-size:x-small; color:#333333;}
'''
feeds = [('Articles', 'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
def get_article_url(self, article):
# Get only article not blog links
@ -34,3 +43,11 @@ class KellogInsight(BasicNewsRecipe):
return link
self.log('Skipping non-article', link)
return None
def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.nextSibling.name = 'h4'
return soup

View File

@ -14,7 +14,7 @@ class NewScientist(BasicNewsRecipe):
description = 'Science news and science articles from New Scientist.'
language = 'en'
publisher = 'New Scientist'
category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software, sex'
category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
delay = 3
oldest_article = 7
max_articles_per_feed = 100

View File

@ -3,50 +3,55 @@ __license__ = 'GPL v3'
'''
philly.com/inquirer/
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class Philly(BasicNewsRecipe):
title = 'Philadelphia Inquirer'
__author__ = 'RadikalDissent'
__author__ = 'RadikalDissent and Sujata Raman'
language = 'en'
description = 'Daily news from the Philadelphia Inquirer'
no_stylesheets = True
use_embedded_content = False
oldest_article = 1
max_articles_per_feed = 25
extra_css = '''
.byline {font-size: small; color: grey; font-style:italic; }
.lastline {font-size: small; color: grey; font-style:italic;}
.contact {font-size: small; color: grey;}
.contact p {font-size: small; color: grey;}
h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
.body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
.byline {font-size: small; color: #666666; font-style:italic; }
.lastline {font-size: small; color: #666666; font-style:italic;}
.contact {font-size: small; color: #666666;}
.contact p {font-size: small; color: #666666;}
#photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
.photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
#photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
.photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
.article_timestamp{font-size:x-small; color:#666666;}
a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
'''
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<body.*<h1>', lambda match: '<body><h1>'),
(r'<font size="2" face="Arial">', lambda match: '<div class="contact"><font class="contact">'),
(r'<font face="Arial" size="2">', lambda match: '<div class="contact"><font class="contact">')
]
]
keep_only_tags = [
dict(name='h1'),
dict(name='p', attrs={'class':['byline','lastline']}),
dict(name='div', attrs={'class':'body-content'}),
dict(name='div', attrs={'class':'story-content'}),
dict(name='div', attrs={'id': 'contentinside'})
]
remove_tags = [
dict(name='hr'),
dict(name='p', attrs={'class':'buzzBadge'}),
dict(name='div', attrs={'class':['linkssubhead','post_balloon','relatedlist','pollquestion','b_sq']}),
dict(name='dl', attrs={'class':'relatedlist'}),
dict(name='div', attrs={'id':['photoNav','sidebar_adholder']}),
dict(name='a', attrs={'class': ['headlineonly','bl']}),
dict(name='img', attrs={'class':'img_noborder'})
]
def print_version(self, url):
return url + '?viewAll=y'
# def print_version(self, url):
# return url + '?viewAll=y'
feeds = [
('Front Page', 'http://www.philly.com/inquirer_front_page.rss'),
('Business', 'http://www.philly.com/inq_business.rss'),
('News', 'http://www.philly.com/inquirer/news/index.rss'),
#('News', 'http://www.philly.com/inquirer/news/index.rss'),
('Nation', 'http://www.philly.com/inq_news_world_us.rss'),
('Local', 'http://www.philly.com/inquirer_local.rss'),
('Health', 'http://www.philly.com/inquirer_health_science.rss'),
@ -54,3 +59,27 @@ class Philly(BasicNewsRecipe):
('Editorial and opinion', 'http://www.philly.com/inq_news_editorial.rss'),
('Sports', 'http://www.philly.com/inquirer_sports.rss')
]
def get_article_url(self, article):
ans = article.link
try:
self.log('Looking for full story link in', ans)
soup = self.index_to_soup(ans)
x = soup.find(text="View All")
if x is not None:
ans = ans + '?viewAll=y'
self.log('Found full story link', ans)
except:
pass
return ans
def postprocess_html(self, soup,first):
for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
tag.extract()
for tag in soup.findAll(name='br'):
tag.extract()
return soup

View File

@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Sciencenews(BasicNewsRecipe):
title = u'ScienceNews'
__author__ = u'Darko Miletic'
__author__ = u'Darko Miletic and Sujata Raman'
description = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
oldest_article = 30
language = 'en'
@ -19,11 +19,43 @@ class Sciencenews(BasicNewsRecipe):
use_embedded_content = False
timefmt = ' [%A, %d %B, %Y]'
extra_css = '''
.content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
.content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
.content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
.content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
.exclusive{color:#FF0000 ;}
.anonymous{color:#14487E ;}
.content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;}
.description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;}
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
'''
keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
remove_tags = [
dict(name='ul', attrs={'id':'content_functions_bottom'})
,dict(name='div', attrs={'id':'content_functions_top'})
,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
,dict(name='img', attrs={'class':'icon'})
,dict(name='div', attrs={'class': 'embiggen'})
]
feeds = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
def get_cover_url(self):
cover_url = None
index = 'http://www.sciencenews.org/view/home'
soup = self.index_to_soup(index)
link_item = soup.find(name = 'img',alt = "issue")
print link_item
if link_item:
cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
return cover_url
def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.name = 'div'
return soup

View File

@ -6,51 +6,86 @@ __docformat__ = 'restructuredtext en'
'''
smh.com.au
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class SMH(BasicNewsRecipe):
title = 'Sydney Morning Herald'
description = 'Business News, World News and Breaking News in Australia'
__author__ = 'Kovid Goyal'
__author__ = 'Kovid Goyal and Sujata Raman'
language = 'en_AU'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
no_javascript = True
timefmt = ' [%A, %d %B, %Y]'
encoding = 'utf-8'
keep_only_tags = [dict(name='div', attrs ={'id':'content'})]
remove_tags = [
dict(name='div', attrs={'align' :'right'}),
dict(name='p', attrs={'class' :'comments'}),
dict(name='a', attrs={'class' :['more-photos','performerpromo']}),
dict(name='img', attrs={'alt' :'aap'}),
dict(name='div', attrs ={'id':['googleAds','moreGoogleAds','comments','footer','sidebar','austereopuff','adSpotIsland']}),
dict(name='div', attrs ={'class':['article-links','wof','articleTools top','cN-multimediaGroup cfix','articleTools bottom']}),
dict(name='div', attrs ={'class':['clear','adSpot-textboxgr1','adSpot-textBox','articleTools-c3 cfix','articleExtras-bottom','span-16 last']}),
dict(name='div', attrs ={'class':[ 'sidebar span-5','cT-socialCommenting','cN-linkList','cN-topicSelector','cT-storyTools cfix','cT-imageMultimedia']}) ,
dict(name='iframe'),
]
extra_css = '''
h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;}
.cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
.articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;}
.cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;}
.source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;}
#content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
.pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;}
#bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
.featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
#idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
'''
feeds = [
('Top Stories', 'http://feeds.smh.com.au/rssheadlines/top.xml'),
('National', 'http://feeds.smh.com.au/rssheadlines/national.xml'),
('World', 'http://feeds.smh.com.au/rssheadlines/world.xml'),
('Business', 'http://www.smh.com.au/rssheadlines/business.xml'),
('National Times', 'http://www.smh.com.au/rssheadlines/opinion/article/rss.xml'),
('Entertainment', 'http://feeds.smh.com.au/rssheadlines/entertainment.xml'),
('Technology', 'http://feeds.smh.com.au/rssheadlines/technology.xml'),
('Sport', 'http://feeds.smh.com.au/rssheadlines/sport.xml'),
]
def preprocess_html(self, soup):
bod = soup.find('bod')
if bod is not None:
bod.tag = 'div'
p = soup.find(id='content')
bod.extract()
p.insert(len(p), bod)
return soup
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.set_handle_refresh(False)
return br
def parse_index(self):
soup = BeautifulSoup(self.browser.open('http://www.smh.com.au/text/').read())
feeds, articles = [], []
feed = None
def get_article_url(self, article):
url = article.link
if 'media' in url:
url = ''
return url
for tag in soup.findAll(['h3', 'a']):
if tag.name == 'h3':
if articles:
feeds.append((feed, articles))
articles = []
feed = self.tag_to_string(tag)
elif feed is not None and tag.has_key('href') and tag['href'].strip():
url = tag['href'].strip()
if url.startswith('/'):
url = 'http://www.smh.com.au' + url
title = self.tag_to_string(tag)
articles.append({
'title': title,
'url' : url,
'date' : strftime('%a, %d %b'),
'description' : '',
'content' : '',
})
return feeds

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.6.23'
__version__ = '0.6.24'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re

View File

@ -101,8 +101,6 @@ def metadata_sources(metadata_type='basic', customize=True, isbndb_key=None):
plugin.site_customization = customization.get(plugin.name, None)
if plugin.name == 'IsbnDB' and isbndb_key is not None:
plugin.site_customization = isbndb_key
if not plugin.is_ok():
continue
yield plugin
def get_isbndb_key():

View File

@ -92,3 +92,8 @@ class POCKETBOOK360(EB600):
VENDOR_NAME = 'PHILIPS'
WINDOWS_MAIN_MEM = 'MASS_STORGE'
OSX_MAIN_MEM = 'Philips Mass Storge Media'
OSX_CARD_A_MEM = 'Philips Mass Storge Media'

View File

@ -66,14 +66,24 @@ class USBMS(CLI, Device):
match = fnmatch.filter(files, '*.%s' % (book_type))
for i, filename in enumerate(match):
self.report_progress((i+1) / float(len(match)), _('Getting list of books on device...'))
try:
bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
except: # Probably a filename encoding error
import traceback
traceback.print_exc()
continue
else:
path = os.path.join(prefix, ebook_dir)
paths = os.listdir(path)
for i, filename in enumerate(paths):
self.report_progress((i+1) / float(len(paths)), _('Getting list of books on device...'))
if path_to_ext(filename) in self.FORMATS:
try:
bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
except: # Probably a file name encoding error
import traceback
traceback.print_exc()
continue
self.report_progress(1.0, _('Getting list of books on device...'))

View File

@ -9,9 +9,11 @@ from threading import Thread
from calibre import prints
from calibre.utils.config import OptionParser
from calibre.utils.logging import default_log
from calibre.ebooks.metadata import MetaInformation
from calibre.customize import Plugin
metadata_config = None
class MetadataSource(Plugin):
author = 'Kovid Goyal'
@ -23,11 +25,17 @@ class MetadataSource(Plugin):
#: tags/rating/reviews/etc.
metadata_type = 'basic'
#: If not None, the customization dialog will allow for string
#: based customization as well the default customization. The
#: string customization will be saved in the site_customization
#: member.
string_customization_help = None
type = _('Metadata download')
def __call__(self, title, author, publisher, isbn, verbose, log=None,
extra=None):
self.worker = Thread(target=self.fetch)
self.worker = Thread(target=self._fetch)
self.worker.daemon = True
self.title = title
self.verbose = verbose
@ -39,23 +47,87 @@ class MetadataSource(Plugin):
self.exception, self.tb, self.results = None, None, []
self.worker.start()
def _fetch(self):
try:
self.fetch()
if self.results:
c = self.config_store().get(self.name, {})
res = self.results
if isinstance(res, MetaInformation):
res = [res]
for mi in res:
if not c.get('rating', True):
mi.rating = None
if not c.get('comments', True):
mi.comments = None
if not c.get('tags', True):
mi.tags = []
except Exception, e:
self.exception = e
self.tb = traceback.format_exc()
def fetch(self):
'''
All the actual work is done here.
'''
raise NotImplementedError
def is_ok(self):
'''
Used to check if the plugin has been correctly customized.
For example: The isbndb plugin checks to see if the site_customization
has been set with an isbndb.com access key.
'''
return True
def join(self):
return self.worker.join()
def is_customizable(self):
return True
def config_store(self):
global metadata_config
if metadata_config is None:
from calibre.utils.config import XMLConfig
metadata_config = XMLConfig('plugins/metadata_download')
return metadata_config
def config_widget(self):
from PyQt4.Qt import QWidget, QVBoxLayout, QLabel, Qt, QLineEdit, \
QCheckBox
from calibre.customize.ui import config
w = QWidget()
w._layout = QVBoxLayout(w)
w.setLayout(w._layout)
if self.string_customization_help is not None:
w._sc_label = QLabel(self.string_customization_help, w)
w._layout.addWidget(w._sc_label)
customization = config['plugin_customization']
def_sc = customization.get(self.name, '')
if not def_sc:
def_sc = ''
w._sc = QLineEdit(def_sc, w)
w._layout.addWidget(w._sc)
w._sc_label.setWordWrap(True)
w._sc_label.setTextInteractionFlags(Qt.LinksAccessibleByMouse
| Qt.LinksAccessibleByKeyboard)
w._sc_label.setOpenExternalLinks(True)
c = self.config_store()
c = c.get(self.name, {})
for x, l in {'rating':_('ratings'), 'tags':_('tags'),
'comments':_('description/reviews')}.items():
cb = QCheckBox(_('Download %s from %s')%(l,
self.name))
setattr(w, '_'+x, cb)
cb.setChecked(c.get(x, True))
w._layout.addWidget(cb)
return w
def save_settings(self, w):
dl_settings = {}
for x in ('rating', 'tags', 'comments'):
dl_settings[x] = getattr(w, '_'+x).isChecked()
c = self.config_store()
c.set(self.name, dl_settings)
if hasattr(w, '_sc'):
sc = unicode(w._sc.text()).strip()
from calibre.customize.ui import customize_plugin
customize_plugin(self, sc)
class GoogleBooks(MetadataSource):
@ -102,14 +174,11 @@ class ISBNDB(MetadataSource):
self.exception = e
self.tb = traceback.format_exc()
def customization_help(self, gui=False):
@property
def string_customization_help(self):
ans = _('To use isbndb.com you must sign up for a %sfree account%s '
'and enter your access key below.')
if gui:
ans = '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
else:
ans = ans.replace('%s', '')
return ans
return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
class Amazon(MetadataSource):
@ -191,7 +260,7 @@ def get_social_metadata(mi, verbose=0):
comments.add(dmi.comments)
if ratings:
rating = sum(ratings)/float(len(ratings))
if mi.rating is None:
if mi.rating is None or mi.rating < 0.1:
mi.rating = rating
else:
mi.rating = (mi.rating + rating)/2.0

View File

@ -3,6 +3,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from PDF files'''
import re
from functools import partial
from calibre import prints
@ -11,10 +12,16 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors, authors_
pdfreflow, pdfreflow_error = plugins['pdfreflow']
_isbn_pat = re.compile(r'ISBN[: ]*([-0-9Xx]+)')
def get_metadata(stream, cover=True):
if pdfreflow is None:
raise RuntimeError(pdfreflow_error)
info = pdfreflow.get_metadata(stream.read(), cover)
raw = stream.read()
isbn = _isbn_pat.search(raw)
if isbn is not None:
isbn = isbn.group(1).replace('-', '').replace(' ', '')
info = pdfreflow.get_metadata(raw, cover)
title = info.get('Title', None)
au = info.get('Author', None)
if au is None:
@ -22,6 +29,8 @@ def get_metadata(stream, cover=True):
else:
au = string_to_authors(au)
mi = MetaInformation(title, au)
if isbn is not None:
mi.isbn = isbn
creator = info.get('Creator', None)
if creator:

View File

@ -777,7 +777,7 @@ class Manifest(object):
# Remove DOCTYPE declaration as it messes up parsing
# Inparticular it causes tostring to insert xmlns
# In particular, it causes tostring to insert xmlns
# declarations, which messes up the coercing logic
idx = data.find('<html')
if idx > -1:
@ -1746,9 +1746,20 @@ class OEBBook(object):
return d.replace('\r\n', '\n').replace('\r', '\n')
if isinstance(data, unicode):
return fix_data(data)
if data[:2] in ('\xff\xfe', '\xfe\xff'):
bom_enc = None
if data[:4] in ('\0\0\xfe\xff', '\xff\xfe\0\0'):
bom_enc = {'\0\0\xfe\xff':'utf-32-be',
'\xff\xfe\0\0':'utf-32-le'}[data[:4]]
data = data[4:]
elif data[:2] in ('\xff\xfe', '\xfe\xff'):
bom_enc = {'\xff\xfe':'utf-16-le', '\xfe\xff':'utf-16-be'}[data[:2]]
data = data[2:]
elif data[:3] == '\xef\xbb\xbf':
bom_enc = 'utf-8'
data = data[3:]
if bom_enc is not None:
try:
return fix_data(data.decode('utf-16'))
return fix_data(data.decode(bom_enc))
except UnicodeDecodeError:
pass
if self.input_encoding is not None:

View File

@ -31,6 +31,13 @@ class BulkConfig(Config):
self.input_label.hide()
self.input_formats.hide()
self.opt_individual_saved_settings.setVisible(True)
self.opt_individual_saved_settings.setChecked(True)
self.opt_individual_saved_settings.setToolTip(_('For '
'settings that cannot be specified in this dialog, use the '
'values saved in a previous conversion (if they exist) instead '
'of using the defaults specified in the Preferences'))
self.connect(self.output_formats, SIGNAL('currentIndexChanged(QString)'),
self.setup_pipeline)

View File

@ -116,6 +116,7 @@ class Config(ResizableDialog, Ui_Dialog):
def __init__(self, parent, db, book_id,
preferred_input_format=None, preferred_output_format=None):
ResizableDialog.__init__(self, parent)
self.opt_individual_saved_settings.setVisible(False)
self.db, self.book_id = db, book_id
self.setup_input_output_formats(self.db, self.book_id, preferred_input_format,

View File

@ -33,6 +33,13 @@
<item>
<widget class="QComboBox" name="input_formats"/>
</item>
<item>
<widget class="QCheckBox" name="opt_individual_saved_settings">
<property name="text">
<string>Use &amp;saved conversion settings for individual books</string>
</property>
</widget>
</item>
<item>
<spacer name="horizontalSpacer">
<property name="orientation">
@ -109,7 +116,7 @@
<x>0</x>
<y>0</y>
<width>810</width>
<height>492</height>
<height>489</height>
</rect>
</property>
<layout class="QVBoxLayout" name="verticalLayout_3">

View File

@ -6,7 +6,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
from PyQt4.QtCore import SIGNAL, QObject
from PyQt4.QtGui import QDialog
from calibre.gui2 import qstring_to_unicode
from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog
from calibre.gui2.dialogs.tag_editor import TagEditor
from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
@ -86,7 +85,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
def sync(self):
for id in self.ids:
au = qstring_to_unicode(self.authors.text())
au = unicode(self.authors.text())
if au:
au = string_to_authors(au)
self.db.set_authors(id, au, notify=False)
@ -97,28 +96,39 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
x = authors_to_sort_string(aut)
if x:
self.db.set_author_sort(id, x, notify=False)
aus = qstring_to_unicode(self.author_sort.text())
aus = unicode(self.author_sort.text())
if aus and self.author_sort.isEnabled():
self.db.set_author_sort(id, aus, notify=False)
if self.write_rating:
self.db.set_rating(id, 2*self.rating.value(), notify=False)
pub = qstring_to_unicode(self.publisher.text())
pub = unicode(self.publisher.text())
if pub:
self.db.set_publisher(id, pub, notify=False)
remove_tags = qstring_to_unicode(self.remove_tags.text()).strip()
remove_tags = unicode(self.remove_tags.text()).strip()
if remove_tags:
remove_tags = [i.strip() for i in remove_tags.split(',')]
self.db.unapply_tags(id, remove_tags, notify=False)
tags = qstring_to_unicode(self.tags.text()).strip()
tags = unicode(self.tags.text()).strip()
if tags:
tags = map(lambda x: x.strip(), tags.split(','))
self.db.set_tags(id, tags, append=True, notify=False)
if self.write_series:
self.db.set_series(id, qstring_to_unicode(self.series.currentText()), notify=False)
self.db.set_series(id, unicode(self.series.currentText()), notify=False)
if self.remove_format.currentIndex() > -1:
self.db.remove_format(id, unicode(self.remove_format.currentText()), index_is_id=True, notify=False)
if self.swap_title_and_author.isChecked():
title = self.db.title(id, index_is_id=True)
aum = self.db.authors(id, index_is_id=True)
if aum:
aum = [a.strip().replace('|', ',') for a in aum.split(',')]
new_title = authors_to_string(aum)
self.db.set_title(id, new_title)
if title:
new_authors = string_to_authors(title)
self.db.set_authors(id, new_authors)
self.changed = True
def series_changed(self):

View File

@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>495</width>
<height>387</height>
<height>456</height>
</rect>
</property>
<property name="windowTitle">
@ -230,6 +230,13 @@
</property>
</widget>
</item>
<item row="9" column="0" colspan="2">
<widget class="QCheckBox" name="swap_title_and_author">
<property name="text">
<string>&amp;Swap title and author</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>

View File

@ -552,6 +552,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
warning_dialog(self, _('There were errors'),
_('There were errors downloading social metadata'),
det_msg=det, show=True)
else:
book.tags = []
self.title.setText(book.title)
self.authors.setText(authors_to_string(book.authors))
if book.author_sort: self.author_sort.setText(book.author_sort)

View File

@ -68,6 +68,7 @@ class LibraryDelegate(QItemDelegate):
self.drawFocus(painter, option, option.rect)
try:
painter.setRenderHint(QPainter.Antialiasing)
painter.setClipRect(option.rect)
y = option.rect.center().y()-self.SIZE/2.
x = option.rect.right() - self.SIZE
painter.setPen(self.PEN)

View File

@ -213,19 +213,18 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.device_manager.umount_device)
####################### Vanity ########################
self.vanity_template = _('<p>For help visit <a href="http://%s.'
'kovidgoyal.net/user_manual">%s.kovidgoyal.net</a>'
'<br>')%(__appname__, __appname__)
self.vanity_template = _('<p>For help see the: <a href="%s">User Manual</a>'
'<br>')%'http://calibre.kovidgoyal.net/user_manual'
self.vanity_template += _('<b>%s</b>: %s by <b>Kovid Goyal '
'%%(version)s</b><br>%%(device)s</p>')%(__appname__, __version__)
self.latest_version = ' '
self.vanity.setText(self.vanity_template%dict(version=' ', device=' '))
self.device_info = ' '
if not opts.no_update_check:
self.update_checker = CheckForUpdates()
self.update_checker = CheckForUpdates(self)
QObject.connect(self.update_checker,
SIGNAL('update_found(PyQt_PyObject)'), self.update_found)
self.update_checker.start()
self.update_checker.start(2000)
####################### Status Bar #####################
self.status_bar = StatusBar(self.jobs_dialog, self.system_tray_icon)
self.setStatusBar(self.status_bar)
@ -246,6 +245,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
md.addAction(_('Download metadata and covers'))
md.addAction(_('Download only metadata'))
md.addAction(_('Download only covers'))
md.addAction(_('Download only social metadata'))
self.metadata_menu = md
self.add_menu = QMenu()
self.add_menu.addAction(_('Add books from a single directory'))
@ -288,7 +288,10 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
set_metadata=False)
QObject.connect(md.actions()[6], SIGNAL('triggered(bool)'),
self.__em5__)
self.__em6__ = partial(self.download_metadata, covers=False,
set_metadata=False, set_social_metadata=True)
QObject.connect(md.actions()[7], SIGNAL('triggered(bool)'),
self.__em6__)
self.save_menu = QMenu()
@ -1027,7 +1030,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
############################### Edit metadata ##############################
def download_metadata(self, checked, covers=True, set_metadata=True):
def download_metadata(self, checked, covers=True, set_metadata=True,
set_social_metadata=None):
rows = self.library_view.selectionModel().selectedRows()
previous = self.library_view.currentIndex()
if not rows or len(rows) == 0:
@ -1037,11 +1041,18 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
return
db = self.library_view.model().db
ids = [db.id(row.row()) for row in rows]
if set_social_metadata is None:
get_social_metadata = config['get_social_metadata']
else:
get_social_metadata = set_social_metadata
from calibre.gui2.metadata import DownloadMetadata
self._download_book_metadata = DownloadMetadata(db, ids,
get_covers=covers, set_metadata=set_metadata,
get_social_metadata=config['get_social_metadata'])
get_social_metadata=get_social_metadata)
self._download_book_metadata.start()
if set_social_metadata is not None and set_social_metadata:
x = _('social metadata')
else:
x = _('covers') if covers and not set_metadata else _('metadata')
self.progress_indicator.start(
_('Downloading %s for %d book(s)')%(x, len(ids)))
@ -1744,6 +1755,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
if write_settings:
self.write_settings()
self.check_messages_timer.stop()
self.update_checker.stop()
self.listener.close()
self.job_manager.server.close()
while self.spare_servers:

View File

@ -60,6 +60,7 @@ class DownloadMetadata(Thread):
self.worker = Worker()
for id in ids:
self.metadata[id] = db.get_metadata(id, index_is_id=True)
self.metadata[id].rating = None
def run(self):
self.exception = self.tb = None
@ -100,15 +101,28 @@ class DownloadMetadata(Thread):
mi.smart_update(fmi)
if mi.isbn and self.get_social_metadata:
self.social_metadata_exceptions = get_social_metadata(mi)
if mi.rating:
mi.rating *= 2
if not self.get_social_metadata:
mi.tags = []
else:
self.failures[id] = (mi.title,
_('No matches found for this book'))
self.commit_covers()
self.commit_covers(True)
if self.set_metadata:
for id in self.fetched_metadata:
self.db.set_metadata(id, self.metadata[id])
mi = self.metadata[id]
if self.set_metadata:
self.db.set_metadata(id, mi)
if not self.set_metadata and self.get_social_metadata:
if mi.rating:
self.db.set_rating(id, mi.rating)
if mi.tags:
self.db.set_tags(id, mi.tags)
if mi.comments:
self.db.set_comment(id, mi.comments)
self.updated = set(self.fetched_metadata)

View File

@ -47,7 +47,10 @@ class TagsView(QTreeView):
ci = self.currentIndex()
if not ci.isValid():
ci = self.indexAt(QPoint(10, 10))
try:
self.model().refresh()
except: #Database connection could be closed if an integrity check is happening
pass
if ci.isValid():
self.scrollTo(ci, QTreeView.PositionAtTop)

View File

@ -111,17 +111,21 @@ def convert_bulk_ebook(parent, queue, db, book_ids, out_format=None, args=[]):
user_recs = cPickle.loads(d.recommendations)
book_ids = convert_existing(parent, db, book_ids, output_format)
return QueueBulk(parent, book_ids, output_format, queue, db, user_recs, args)
use_saved_single_settings = d.opt_individual_saved_settings.isChecked()
return QueueBulk(parent, book_ids, output_format, queue, db, user_recs,
args, use_saved_single_settings=use_saved_single_settings)
class QueueBulk(QProgressDialog):
def __init__(self, parent, book_ids, output_format, queue, db, user_recs, args):
def __init__(self, parent, book_ids, output_format, queue, db, user_recs,
args, use_saved_single_settings=True):
QProgressDialog.__init__(self, '',
QString(), 0, len(book_ids), parent)
self.setWindowTitle(_('Queueing books for bulk conversion'))
self.book_ids, self.output_format, self.queue, self.db, self.args, self.user_recs = \
book_ids, output_format, queue, db, args, user_recs
self.parent = parent
self.use_saved_single_settings = use_saved_single_settings
self.i, self.bad, self.jobs, self.changed = 0, [], [], False
self.timer = QTimer(self)
self.connect(self.timer, SIGNAL('timeout()'), self.do_book)
@ -149,9 +153,10 @@ class QueueBulk(QProgressDialog):
combined_recs = GuiRecommendations()
default_recs = load_defaults('%s_input' % input_format)
specific_recs = load_specifics(self.db, book_id)
for key in default_recs:
combined_recs[key] = default_recs[key]
if self.use_saved_single_settings:
specific_recs = load_specifics(self.db, book_id)
for key in specific_recs:
combined_recs[key] = specific_recs[key]
for item in self.user_recs:

View File

@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import traceback
from PyQt4.QtCore import QThread, SIGNAL
from PyQt4.QtCore import QObject, SIGNAL, QTimer
import mechanize
from calibre.constants import __version__, iswindows, isosx
@ -11,9 +11,21 @@ from calibre import browser
URL = 'http://status.calibre-ebook.com/latest'
class CheckForUpdates(QThread):
class CheckForUpdates(QObject):
def __init__(self, parent):
QObject.__init__(self, parent)
self.timer = QTimer(self)
self.first = True
self.connect(self.timer, SIGNAL('timeout()'), self)
self.start = self.timer.start
self.stop = self.timer.stop
def __call__(self):
if self.first:
self.timer.setInterval(1000*24*60*60)
self.first = False
def run(self):
try:
br = browser()
req = mechanize.Request(URL)

View File

@ -9,7 +9,6 @@ Command line interface to the calibre database.
import sys, os, cStringIO
from textwrap import TextWrapper
from urllib import quote
from calibre import terminal_controller, preferred_encoding, prints
from calibre.utils.config import OptionParser, prefs
@ -48,10 +47,10 @@ XML_TEMPLATE = '''\
<comments>${record['comments']}</comments>
<series py:if="record['series']" index="${record['series_index']}">${record['series']}</series>
<isbn>${record['isbn']}</isbn>
<cover py:if="record['cover']">${record['cover']}</cover>
<cover py:if="record['cover']">${record['cover'].replace(os.sep, '/')}</cover>
<formats py:if="record['formats']">
<py:for each="path in record['formats']">
<format>${path}</format>
<format>${path.replace(os.sep, '/')}</format>
</py:for>
</formats>
</record>
@ -78,9 +77,9 @@ STANZA_TEMPLATE='''\
<id>urn:calibre:${record['uuid']}</id>
<author><name>${record['author_sort']}</name></author>
<updated>${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%SZ')}</updated>
<link type="application/epub+zip" href="${quote(record['fmt_epub'].replace(sep, '/')).replace('http%3A', 'http:')}" />
<link py:if="record['cover']" rel="x-stanza-cover-image" type="image/png" href="${quote(record['cover'].replace(sep, '/')).replace('http%3A', 'http:')}" />
<link py:if="record['cover']" rel="x-stanza-cover-image-thumbnail" type="image/png" href="${quote(record['cover'].replace(sep, '/')).replace('http%3A', 'http:')}" />
<link type="application/epub+zip" href="${quote(record['fmt_epub'].replace(sep, '/'))}"/>
<link py:if="record['cover']" rel="x-stanza-cover-image" type="image/png" href="${quote(record['cover'].replace(sep, '/'))}"/>
<link py:if="record['cover']" rel="x-stanza-cover-image-thumbnail" type="image/png" href="${quote(record['cover'].replace(sep, '/'))}"/>
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">
<py:for each="f in ('authors', 'publisher', 'rating', 'tags', 'series', 'isbn')">
@ -186,8 +185,10 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
return o.getvalue()
elif output_format == 'xml':
template = MarkupTemplate(XML_TEMPLATE)
return template.generate(data=data).render('xml')
return template.generate(data=data, os=os).render('xml')
elif output_format == 'stanza':
def quote(raw):
return raw.replace('"', r'\"')
data = [i for i in data if i.has_key('fmt_epub')]
for x in data:
if isinstance(x['fmt_epub'], unicode):

View File

@ -115,7 +115,7 @@ class PostInstall:
self.info('Creating symlinks...')
for exe in scripts.keys():
dest = os.path.join(self.opts.staging_bindir, exe)
if os.path.exists(dest):
if os.path.lexists(dest):
os.unlink(dest)
tgt = os.path.join(getattr(sys, 'frozen_path'), exe)
self.info('\tSymlinking %s to %s'%(tgt, dest))

View File

@ -421,7 +421,7 @@ button in the individual book conversion dialog.
When you Bulk Convert a set of books, settings are taken in the following order:
* From the defaults set in Preferences->Conversion
* From the saved conversion settings for each book being converted (if any)
* From the saved conversion settings for each book being converted (if any). This can be turned off by the option in the top left corner of the Bulk Conversion dialog.
* From the settings set in the Bulk conversion dialog
Note that the final settings for each book in a Bulk Conversion will be saved and re-used if the book is converted again. Since the

View File

@ -81,7 +81,7 @@ Device Integration
What devices does |app| support?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
At the moment |app| has full support for the SONY PRS 300/500/505/600/700, Cybook Gen 3/Opus, Amazon Kindle 1/2/DX, Netronix EB600, Ectaco Jetbook, BeBook/BeBook Mini, Irex Illiad/DR1000, Foxit eSlick, Android phones and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
At the moment |app| has full support for the SONY PRS 300/500/505/600/700, Cybook Gen 3/Opus, Amazon Kindle 1/2/DX, Netronix EB600, Ectaco Jetbook, BeBook/BeBook Mini, Irex Illiad/DR1000, Foxit eSlick, PocketBook 360, Android phones and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
How can I help get my device supported in |app|?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -108,7 +108,7 @@ Metadata download plugins
.. class:: calibre.ebooks.metadata.fetch.MetadataSource
Represents a source to query for metadata. Subclasses must implement
at least the fetch method and optionally the is_ok method.
at least the fetch method.
When :meth:`fetch` is called, the `self` object will have the following
useful attributes (each of which may be None)::
@ -124,8 +124,9 @@ Metadata download plugins
.. automember:: calibre.ebooks.metadata.fetch.MetadataSource.metadata_type
.. automember:: calibre.ebooks.metadata.fetch.MetadataSource.string_customization_help
.. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.fetch
.. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.is_ok

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
'''
Manage application-wide preferences.
'''
import os, re, cPickle, textwrap, traceback
import os, re, cPickle, textwrap, traceback, plistlib
from copy import deepcopy
from functools import partial
from optparse import OptionParser as _OptionParser
@ -34,9 +34,11 @@ else:
plugin_dir = os.path.join(config_dir, 'plugins')
CONFIG_DIR_MODE = 0700
def make_config_dir():
if not os.path.exists(plugin_dir):
os.makedirs(plugin_dir, mode=448) # 0700 == 448
os.makedirs(plugin_dir, mode=CONFIG_DIR_MODE)
def check_config_write_access():
return os.access(config_dir, os.W_OK) and os.access(config_dir, os.X_OK)
@ -552,6 +554,72 @@ class DynamicConfig(dict):
dynamic = DynamicConfig()
class XMLConfig(dict):
'''
Similar to :class:`DynamicConfig`, except that it uses an XML storage
backend instead of a pickle file.
See `http://docs.python.org/dev/library/plistlib.html`_ for the supported
data types.
'''
def __init__(self, rel_path_to_cf_file):
dict.__init__(self)
self.file_path = os.path.join(config_dir,
*(rel_path_to_cf_file.split('/')))
self.file_path = os.path.abspath(self.file_path)
if not self.file_path.endswith('.plist'):
self.file_path += '.plist'
self.refresh()
def refresh(self):
d = {}
if os.path.exists(self.file_path):
with ExclusiveFile(self.file_path) as f:
raw = f.read()
try:
d = plistlib.readPlistFromString(raw) if raw.strip() else {}
except SystemError:
pass
except:
import traceback
traceback.print_exc()
d = {}
self.clear()
self.update(d)
def __getitem__(self, key):
try:
ans = dict.__getitem__(self, key)
if isinstance(ans, plistlib.Data):
ans = ans.data
return ans
except KeyError:
return None
def __setitem__(self, key, val):
if isinstance(val, (bytes, str)):
val = plistlib.Data(val)
dict.__setitem__(self, key, val)
self.commit()
def set(self, key, val):
self.__setitem__(key, val)
def commit(self):
if hasattr(self, 'file_path') and self.file_path:
dpath = os.path.dirname(self.file_path)
if not os.path.exists(dpath):
os.makedirs(dpath, mode=CONFIG_DIR_MODE)
with ExclusiveFile(self.file_path) as f:
raw = plistlib.writePlistToString(self)
f.seek(0)
f.truncate()
f.write(raw)
def _prefs():
c = Config('global', 'calibre wide preferences')
c.add_opt('database_path',

View File

@ -56,6 +56,8 @@ def _quoteattr(data, entities={}):
the optional entities parameter. The keys and values must all be
strings; each key will be replaced with its corresponding value.
"""
entities['\n']='&#10;'
entities['\r']='&#12;'
data = _escape(data, entities)
if '"' in data:
if "'" in data:

View File

@ -17,7 +17,7 @@
#
# Contributor(s):
#
TOOLSVERSION = u"ODFPY/0.9.1dev"
TOOLSVERSION = u"ODFPY/0.9.2dev"
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"

View File

@ -185,7 +185,7 @@ class OpenDocument:
if self.fontfacedecls.hasChildNodes():
self.fontfacedecls.toXml(1, xml)
a = AutomaticStyles()
stylelist = self._used_auto_styles([self.styles, self.body])
stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body])
if len(stylelist) > 0:
a.write_open_tag(1, xml)
for s in stylelist:
@ -233,9 +233,11 @@ class OpenDocument:
for styleref in ( (DRAWNS,u'style-name'),
(DRAWNS,u'text-style-name'),
(PRESENTATIONNS,u'style-name'),
(STYLENS,u'style-name'),
(STYLENS,u'data-style-name'),
(STYLENS,u'list-style-name'),
(STYLENS,u'page-layout-name'),
(STYLENS,u'style-name'),
(TABLENS,u'default-cell-style-name'),
(TABLENS,u'style-name'),
(TEXTNS,u'style-name') ):
if e.getAttrNS(styleref[0],styleref[1]):

View File

@ -50,3 +50,5 @@ def Radialgradient(**args):
def Stop(**args):
return Element(qname = (SVGNS,'stop'), **args)
def Title(**args):
return Element(qname = (SVGNS,'title'), **args)

View File

@ -446,6 +446,9 @@ def SequenceRef(**args):
def SheetName(**args):
return Element(qname = (TEXTNS,'sheet-name'), **args)
def SoftPageBreak(**args):
return Element(qname = (TEXTNS,'soft-page-break'), **args)
def SortKey(**args):
return Element(qname = (TEXTNS,'sort-key'), **args)

View File

@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
#
# This is free software. You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
@ -22,16 +22,11 @@
"""Class to show and manipulate user fields in odf documents."""
import sys
import time
import zipfile
import xml.sax
import xml.sax.handler
import xml.sax.saxutils
from odf.namespaces import OFFICENS, TEXTNS
from cStringIO import StringIO
from odf.text import UserFieldDecl
from odf.namespaces import OFFICENS
from odf.opendocument import load
OUTENCODING = "utf-8"
@ -64,6 +59,26 @@ class UserFields(object):
"""
self.src_file = src
self.dest_file = dest
self.document = None
def loaddoc(self):
if isinstance(self.src_file, basestring):
# src_file is a filename, check if it is a zip-file
if not zipfile.is_zipfile(self.src_file):
raise TypeError("%s is no odt file." % self.src_file)
elif self.src_file is None:
# use stdin if no file given
self.src_file = sys.stdin
self.document = load(self.src_file)
def savedoc(self):
# write output
if self.dest_file is None:
# use stdout if no filename given
self.document.save('-')
else:
self.document.save(self.dest_file)
def list_fields(self):
"""List (extract) all known user-fields.
@ -81,15 +96,21 @@ class UserFields(object):
Returns list of tuples (<field name>, <field type>, <value>).
"""
self.loaddoc()
found_fields = []
def _callback(field_name, value_type, value, attrs):
all_fields = self.document.getElementsByType(UserFieldDecl)
for f in all_fields:
value_type = f.getAttribute('valuetype')
if value_type == 'string':
value = f.getAttribute('stringvalue')
else:
value = f.getAttribute('value')
field_name = f.getAttribute('name')
if field_names is None or field_name in field_names:
found_fields.append((field_name.encode(OUTENCODING),
value_type.encode(OUTENCODING),
value.encode(OUTENCODING)))
return attrs
self._content_handler(_callback)
return found_fields
def list_values(self, field_names):
@ -133,199 +154,16 @@ class UserFields(object):
Returns None
"""
def _callback(field_name, value_type, value, attrs):
if field_name in data:
valattr = VALUE_TYPES.get(value_type)
attrs = dict(attrs.items())
# Take advantage that startElementNS can take a normal
# dict as attrs
attrs[valattr] = data[field_name]
return attrs
self._content_handler(_callback, write_file=True)
def _content_handler(self, callback_func, write_file=False):
"""Handle the content using the callback function and write result if
necessary.
callback_func ... function called for each field found in odf document
signature: field_name ... name of current field
value_type ... type of current field
value ... value of current field
attrs ... tuple of attrs of current field
returns: tuple or dict of attrs
write_file ... boolean telling wether write result to file
"""
class DevNull(object):
"""IO-object which behaves like /dev/null."""
def write(self, str):
pass
# get input
if isinstance(self.src_file, basestring):
# src_file is a filename, check if it is a zip-file
if not zipfile.is_zipfile(self.src_file):
raise TypeError("%s is no odt file." % self.src_file)
elif self.src_file is None:
# use stdin if no file given
self.src_file = sys.stdin
zin = zipfile.ZipFile(self.src_file, 'r')
content_xml = zin.read('content.xml')
# prepare output
if write_file:
output_io = StringIO()
if self.dest_file is None:
# use stdout if no filename given
self.dest_file = sys.stdout
zout = zipfile.ZipFile(self.dest_file, 'w')
else:
output_io = DevNull()
# parse input
odfs = ODFContentParser(callback_func, output_io)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, 1)
parser.setContentHandler(odfs)
parser.parse(StringIO(content_xml))
# write output
if write_file:
# Loop through the input zipfile and copy the content to
# the output until we get to the content.xml. Then
# substitute.
for zinfo in zin.infolist():
if zinfo.filename == "content.xml":
# Write meta
zi = zipfile.ZipInfo("content.xml", time.localtime()[:6])
zi.compress_type = zipfile.ZIP_DEFLATED
zout.writestr(zi, odfs.content())
else:
payload = zin.read(zinfo.filename)
zout.writestr(zinfo, payload)
zout.close()
zin.close()
class ODFContentParser(xml.sax.saxutils.XMLGenerator):
def __init__(self, callback_func, out=None, encoding=OUTENCODING):
"""Constructor.
callback_func ... function called for each field found in odf document
signature: field_name ... name of current field
value_type ... type of current field
value ... value of current field
attrs ... tuple of attrs of current field
returns: tuple or dict of attrs
out ... file like object for output
encoding ... encoding for output
"""
self._callback_func = callback_func
xml.sax.saxutils.XMLGenerator.__init__(self, out, encoding)
def _qname(self, name):
"""Builds a qualified name from a (ns_url, localname) pair"""
if name[0]:
if name[0] == u'http://www.w3.org/XML/1998/namespace':
return u'xml' + ":" + name[1]
# The name is in a non-empty namespace
prefix = self._current_context[name[0]]
if prefix:
# If it is not the default namespace, prepend the prefix
return prefix + ":" + name[1]
# Return the unqualified name
return name[1]
def startElementNS(self, name, qname, attrs):
if name == (TEXTNS, u'user-field-decl'):
field_name = attrs.get((TEXTNS, u'name'))
value_type = attrs.get((OFFICENS, u'value-type'))
self.loaddoc()
all_fields = self.document.getElementsByType(UserFieldDecl)
for f in all_fields:
field_name = f.getAttribute('name')
if data.has_key(field_name):
value_type = f.getAttribute('valuetype')
value = data.get(field_name)
if value_type == 'string':
value = attrs.get((OFFICENS, u'string-value'))
f.setAttribute('stringvalue', value)
else:
value = attrs.get((OFFICENS, u'value'))
f.setAttribute('value', value)
self.savedoc()
attrs = self._callback_func(field_name, value_type, value, attrs)
self._startElementNS(name, qname, attrs)
def _startElementNS(self, name, qname, attrs):
# copy of xml.sax.saxutils.XMLGenerator.startElementNS
# necessary because we have to provide our own writeattr
# function which is called by this method
if name[0] is None:
name = name[1]
elif self._current_context[name[0]] is None:
# default namespace
name = name[1]
else:
name = self._current_context[name[0]] + ":" + name[1]
self._out.write('<' + name)
for k,v in self._undeclared_ns_maps:
if k is None:
self._out.write(' xmlns="%s"' % (v or ''))
else:
self._out.write(' xmlns:%s="%s"' % (k,v))
self._undeclared_ns_maps = []
for (name, value) in attrs.items():
if name[0] is None:
name = name[1]
elif self._current_context[name[0]] is None:
# default namespace
#If an attribute has a nsuri but not a prefix, we must
#create a prefix and add a nsdecl
prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr
self._generated_prefix_ctr = self._generated_prefix_ctr + 1
name = prefix + ':' + name[1]
self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0])))
self._current_context[name[0]] = prefix
else:
name = self._current_context[name[0]] + ":" + name[1]
self._out.write(' %s=' % name)
writeattr(self._out, value)
self._out.write('>')
def content(self):
return self._out.getvalue()
ATTR_ENTITIES = {
'\n': '&#x0a;' # convert newlines into entities inside attributes
}
def writetext(stream, text, entities={}):
text = xml.sax.saxutils.escape(text, entities)
try:
stream.write(text)
except UnicodeError:
for c in text:
try:
stream.write(c)
except UnicodeError:
stream.write(u"&#%d;" % ord(c))
def writeattr(stream, text):
# copied from xml.sax.saxutils.writeattr added support for an
# additional entity mapping
countdouble = text.count('"')
entities = ATTR_ENTITIES.copy()
if countdouble:
countsingle = text.count("'")
if countdouble <= countsingle:
entities['"'] = "&quot;"
quote = '"'
else:
entities["'"] = "&apos;"
quote = "'"
else:
quote = '"'
stream.write(quote)
writetext(stream, text, entities)
stream.write(quote)