Sync to trunk.

This commit is contained in:
John Schember 2009-11-21 21:22:28 -05:00
commit d96542418a
52 changed files with 5724 additions and 4549 deletions

View File

@ -0,0 +1,87 @@
#!/usr/bin/python
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class FokkeEnSukkeRecipe(BasicNewsRecipe) :
__license__ = 'GPL v3'
__author__ = 'kwetal'
language = 'nl'
description = u'Popular Dutch daily cartoon Fokke en Sukke'
title = u'Fokke en Sukke'
no_stylesheets = True
# For reasons unknown to me the extra css is, on the cartoon pages, inserted in the <body> and not in the <head>. My reader (Sony PRS-600) has a serious issue
# with that: it treats it as content and displays it as is. Setting this property to empty solves this for me.
template_css = ''
INDEX = u'http://foksuk.nl'
# This cover is not as nice as it could be, needs some work
#cover_url = 'http://foksuk.nl/content/wysiwyg/simpleimages/image350.gif'
keep_only_tags = [dict(name='div', attrs={'class' : 'cartoon'})]
def parse_index(self) :
# A list with daynames as they _can_ appear in the index
dayNames = ['maandag', 'dinsdag', 'woensdag', 'donderdag', 'vrijdag', 'zaterdag & zondag']
soup = self.index_to_soup(self.INDEX)
# Find the links for the various cartoons for this week and loop through them
index = soup.find('div', attrs={'class' : 'selectcartoon'})
links = index.findAll('a')
maxIndex = len(links) - 1
articles = []
for i in range(len(links)) :
# The first link does not interest us, as it points to no cartoon. A begin_at parameter in the range() function would be nice.
if i == 0 :
continue
# There can be more than one cartoon for a given day (currently either one or two). If there's only one, there is just a link with the dayname.
# If there are two, there are three links in sequence: <a>dayname</a> <a>1</a> <a>2</a>. In that case we're interested in the last two.
if links[i].renderContents() in dayNames :
# If the link is not in daynames, we processed it already, but if it is, let's see if the next one has '1' as content
if (i + 1 <= maxIndex) and (links[i + 1].renderContents() == '1') :
# Got you! Add it to the list
article = {'title' : links[i].renderContents() + ' 1', 'date' : u'', 'url' : self.INDEX + links[i + 1]['href'], 'description' : ''}
articles.append(article)
# If there is a '1', there should be a '2' as well, but better save than sorry
if (i + 2 <= maxIndex) and (links[i + 2].renderContents() == '2') :
# Got you! Add it to the list
article = {'title' : links[i].renderContents() + ' 2', 'date' : u'', 'url' : self.INDEX + links[i + 2]['href'], 'description' : ''}
articles.append(article)
else :
# There is only one cartoon for this day. Add it to the list.
article = {'title' : links[i].renderContents(), 'date' : u'', 'url' : self.INDEX + links[i]['href'], 'description' : ''}
articles.append(article)
# Might as well use the weeknumber as title
week = index.find('span', attrs={'class' : 'week'}).renderContents()
return [[week, articles]]
def preprocess_html(self, soup) :
# This method is called for every page, be it cartoon or TOC. We need to process each in their own way
cartoon = soup.find('div', attrs={'class' : 'cartoon'})
if cartoon :
# It is a cartoon. Extract the title.
title = ''
img = soup.find('img', attrs = {'alt' : True})
if img :
title = img['alt']
# Using the 'extra_css' displays it in the <body> and not in the <head>. See comment at the top of this class. Setting the style this way solves that.
tag = Tag(soup, 'div', [('style', 'text-align: center; margin-bottom: 8px')])
tag.insert(0, title)
cartoon.insert(0, tag)
# I have not quite worked out why, but we have to throw out this part of the page. It contains the very same index we processed earlier,
# and Calibre does not like that too much. As far as I can tell it goes into recursion and the result is an empty eBook.
select = cartoon.find('div', attrs={'class' : 'selectcartoon'})
if select :
select.extract()
return cartoon
else :
# It is a TOC. Just return the whole lot.
return soup

View File

@ -43,97 +43,45 @@ class Guardian(BasicNewsRecipe):
#match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
''' '''
def find_sections(self):
soup = self.index_to_soup('http://www.guardian.co.uk/theguardian')
# find cover pic
img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'})
if img is not None:
self.cover_url = img['src']
# end find cover pic
idx = soup.find('div', id='book-index')
for s in idx.findAll('strong', attrs={'class':'book'}):
a = s.find('a', href=True)
yield (self.tag_to_string(a), a['href'])
def find_articles(self, url):
soup = self.index_to_soup(url)
div = soup.find('div', attrs={'class':'book-index'})
for ul in div.findAll('ul', attrs={'class':'trailblock'}):
for li in ul.findAll('li'):
a = li.find(href=True)
if not a:
continue
title = self.tag_to_string(a)
url = a['href']
if not title or not url:
continue
tt = li.find('div', attrs={'class':'trailtext'})
if tt is not None:
for da in tt.findAll('a'): da.extract()
desc = self.tag_to_string(tt).strip()
yield {
'title': title, 'url':url, 'description':desc,
'date' : strftime('%a, %d %b'),
}
def parse_index(self): def parse_index(self):
feeds = []
soup = self.index_to_soup('http://www.guardian.co.uk/theguardian') for title, href in self.find_sections():
# find cover pic feeds.append((title, list(self.find_articles(href))))
img = soup.find( 'img',attrs ={'alt':'Guardian digital edition'}) return feeds
if img is None: return None
else:
self.cover_url = img['src']
# end find cover pic
sections = []
ans = []
for li in soup.findAll( 'li'):
section = ''
articles = []
if li.a and li.a.has_key('href'):
url = li.a['href']
if 'mainsection' in url:
section = self.tag_to_string(url)
i = len(section)
index1 = section.rfind('/',0,i)
section = section[index1+1:i]
sections.append(section)
#find the articles in the Main Section start
soup = self.index_to_soup(url)
date = strftime('%a, %d %b')
descl = []
for desclist in soup.findAll(name='div',attrs={'class':"trailtext"}):
descl.append(self.tag_to_string(desclist).strip())
t = -1
for tag in soup.findAll('h3'):
t = t+1
for a in tag.findAll('a'):
if t < len(descl):
desc = descl[t]
else:
desc = ''
if a and a.has_key('href'):
url2 = a['href']
else:
url2 =''
title = self.tag_to_string(a)
if len(articles) == 0: #First article
articles.append({
'title':title,
'date':date,
'url':url2,
'description':desc,
})
else:
#eliminate duplicates start
if {'title':title,'date':date,'url':url2,'description':desc} in articles :
url2 = ''
#eliminate duplicates end
else:
if 'http://jobs.guardian.co.uk/' in url2:
url2 = ''
else:
articles.append({
'title':title,
'date':date,
'url':url2,
'description':desc,
})
#find the articles in the Main Section end
ans.append( articles)
else:
url =''
titles = map(self.find_title, sections)
ans1 = list(zip(titles,ans))
return ans1[2:]
def find_title(self, section):
d = {'topstories':'Top Stories', 'international':'International', 'editorialsandreply':'Editorials and Reply',
'commentanddebate':'Comment and Debate','uknews':'UK News','saturday':'Saturday','sunday':'Sunday',
'reviews':'Reviews', 'obituaries':'Obituaries'}
return d.get(section, section)
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -0,0 +1,110 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class HBR(BasicNewsRecipe):
title = 'Harvard Business Review'
description = 'To subscribe go to http://hbr.harvardbusiness.org'
needs_subscription = True
__author__ = 'Kovid Goyal and Sujata Raman'
timefmt = ' [%B %Y]'
language = 'en'
no_stylesheets = True
LOGIN_URL = 'http://hbr.harvardbusiness.org/login?request_url=/'
INDEX = 'http://hbr.harvardbusiness.org/current'
keep_only_tags = [dict(name='div', id='content')]
remove_tags = [
dict(id=['articleDate', 'subscriptionModule', 'errorArea',
'feedbackForm', 'relatedModule', 'articleSecondaryModule',
'contentRight', 'summaryLink']),
dict(name='form'),
]
extra_css = '''
a {font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000; }
.article{font-family:Georgia,"Times New Roman",Times,serif; font-size: xx-small;}
h2{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large; }
h4{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:small; }
#articleAuthors{font-family:Georgia,"Times New Roman",Times,serif; font-style:italic; color:#000000;font-size:x-small;}
#summaryText{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:x-small;}
'''
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open(self.LOGIN_URL)
br.select_form(nr=0)
br['ssousername'] = self.username
br['password'] = self.password
raw = br.submit().read()
if 'My Account' not in raw:
raise Exception('Failed to login, are you sure your username and password are correct?')
self.logout_url = None
link = br.find_link(text='(sign out)')
if link:
self.logout_url = link.absolute_url
return br
def cleanup(self):
if self.logout_url is not None:
self.browser.open(self.logout_url)
def map_url(self, url):
if url.endswith('/ar/1'):
return url[:-1]+'pr'
def get_features(self, soup):
div = soup.find('div', id='issueFeatures')
for li in div.findAll('li'):
a = li.find('a', href=True)
url = 'http://hbr.harvardbusiness.org'+a['href']
url = self.map_url(url)
if not url:
continue
title = self.tag_to_string(a)
p = li.find('p')
desc = ''
if p is not None:
desc = self.tag_to_string(p)
yield {'title':title, 'url':url, 'description':desc}
def get_departments(self, soup):
div = soup.find('div', id='issueDepartmentsContent')
for h4 in div.findAll('h4'):
feed = self.tag_to_string(h4)
articles = []
ul = h4.findNextSibling('ul')
for li in ul.findAll('li'):
a = li.find('a', href=True)
url = 'http://hbr.harvardbusiness.org'+a['href']
url = self.map_url(url)
if not url:
continue
title = self.tag_to_string(a)
p = li.find('p')
desc = ''
if p is not None:
desc = self.tag_to_string(p)
articles.append({'title':title, 'url':url, 'description':desc})
yield [feed, articles]
def parse_index(self):
soup = self.index_to_soup(self.INDEX)
feeds = []
feeds.append(('Features', list(self.get_features(soup))))
feeds.extend(self.get_departments(soup))
return feeds
def get_cover_url(self):
cover_url = None
index = 'http://hbr.harvardbusiness.org/current'
soup = self.index_to_soup(index)
link_item = soup.find('img', alt=re.compile("HBR Cover Image"), src=True)
if link_item:
cover_url = 'http://hbr.harvardbusiness.org' + link_item['src']
return cover_url

View File

@ -12,20 +12,29 @@ from calibre.web.feeds.news import BasicNewsRecipe
class KellogInsight(BasicNewsRecipe): class KellogInsight(BasicNewsRecipe):
title = 'Kellog Insight' title = 'Kellog Insight'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal and Sujata Raman'
description = 'Articles from the Kellog School of Management' description = 'Articles from the Kellog School of Management'
no_stylesheets = True no_stylesheets = True
encoding = 'utf-8' encoding = 'utf-8'
language = 'en' language = 'en'
oldest_article = 60 oldest_article = 60
remove_tags_before = {'name':'h1'}
remove_tags_after = {'class':'col-two-text'}
keep_only_tags = [dict(name='div', attrs={'id':['print_no_comments']})]
remove_tags = [dict(name='div', attrs={'class':'col-three'})]
feeds = [('Articles', extra_css = '''
'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')] h1{font-family:arial; font-size:medium; color:#333333;}
.col-one{font-family:arial; font-size:xx-small;}
.col-two{font-family:arial; font-size:x-small; }
h2{font-family:arial; font-size:small; color:#666666;}
h3{font-family:arial; font-size:small; color:#333333;text-transform: uppercase; font-weight:normal;}
h4{color:#660000;font-family:arial; font-size:x-small;}
.col-two-text{font-family:arial; font-size:x-small; color:#333333;}
'''
feeds = [('Articles', 'http://insight.kellogg.northwestern.edu/index.php/Kellogg/RSS')]
def get_article_url(self, article): def get_article_url(self, article):
# Get only article not blog links # Get only article not blog links
@ -34,3 +43,11 @@ class KellogInsight(BasicNewsRecipe):
return link return link
self.log('Skipping non-article', link) self.log('Skipping non-article', link)
return None return None
def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.nextSibling.name = 'h4'
return soup

View File

@ -14,7 +14,7 @@ class NewScientist(BasicNewsRecipe):
description = 'Science news and science articles from New Scientist.' description = 'Science news and science articles from New Scientist.'
language = 'en' language = 'en'
publisher = 'New Scientist' publisher = 'New Scientist'
category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software, sex' category = 'science news, science articles, science jobs, drugs, cancer, depression, computer software'
delay = 3 delay = 3
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100

View File

@ -3,50 +3,55 @@ __license__ = 'GPL v3'
''' '''
philly.com/inquirer/ philly.com/inquirer/
''' '''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
class Philly(BasicNewsRecipe): class Philly(BasicNewsRecipe):
title = 'Philadelphia Inquirer' title = 'Philadelphia Inquirer'
__author__ = 'RadikalDissent' __author__ = 'RadikalDissent and Sujata Raman'
language = 'en' language = 'en'
description = 'Daily news from the Philadelphia Inquirer' description = 'Daily news from the Philadelphia Inquirer'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 25 max_articles_per_feed = 25
extra_css = ''' extra_css = '''
.byline {font-size: small; color: grey; font-style:italic; } h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
.lastline {font-size: small; color: grey; font-style:italic;} h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
.contact {font-size: small; color: grey;} .body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
.contact p {font-size: small; color: grey;} .byline {font-size: small; color: #666666; font-style:italic; }
.lastline {font-size: small; color: #666666; font-style:italic;}
.contact {font-size: small; color: #666666;}
.contact p {font-size: small; color: #666666;}
#photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
.photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
#photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
.photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
.article_timestamp{font-size:x-small; color:#666666;}
a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
''' '''
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<body.*<h1>', lambda match: '<body><h1>'),
(r'<font size="2" face="Arial">', lambda match: '<div class="contact"><font class="contact">'),
(r'<font face="Arial" size="2">', lambda match: '<div class="contact"><font class="contact">')
]
]
keep_only_tags = [ keep_only_tags = [
dict(name='h1'), dict(name='div', attrs={'class':'story-content'}),
dict(name='p', attrs={'class':['byline','lastline']}), dict(name='div', attrs={'id': 'contentinside'})
dict(name='div', attrs={'class':'body-content'}), ]
]
remove_tags = [ remove_tags = [
dict(name='hr'), dict(name='div', attrs={'class':['linkssubhead','post_balloon','relatedlist','pollquestion','b_sq']}),
dict(name='p', attrs={'class':'buzzBadge'}), dict(name='dl', attrs={'class':'relatedlist'}),
dict(name='div', attrs={'id':['photoNav','sidebar_adholder']}),
dict(name='a', attrs={'class': ['headlineonly','bl']}),
dict(name='img', attrs={'class':'img_noborder'})
] ]
def print_version(self, url): # def print_version(self, url):
return url + '?viewAll=y' # return url + '?viewAll=y'
feeds = [ feeds = [
('Front Page', 'http://www.philly.com/inquirer_front_page.rss'), ('Front Page', 'http://www.philly.com/inquirer_front_page.rss'),
('Business', 'http://www.philly.com/inq_business.rss'), ('Business', 'http://www.philly.com/inq_business.rss'),
('News', 'http://www.philly.com/inquirer/news/index.rss'), #('News', 'http://www.philly.com/inquirer/news/index.rss'),
('Nation', 'http://www.philly.com/inq_news_world_us.rss'), ('Nation', 'http://www.philly.com/inq_news_world_us.rss'),
('Local', 'http://www.philly.com/inquirer_local.rss'), ('Local', 'http://www.philly.com/inquirer_local.rss'),
('Health', 'http://www.philly.com/inquirer_health_science.rss'), ('Health', 'http://www.philly.com/inquirer_health_science.rss'),
@ -54,3 +59,27 @@ class Philly(BasicNewsRecipe):
('Editorial and opinion', 'http://www.philly.com/inq_news_editorial.rss'), ('Editorial and opinion', 'http://www.philly.com/inq_news_editorial.rss'),
('Sports', 'http://www.philly.com/inquirer_sports.rss') ('Sports', 'http://www.philly.com/inquirer_sports.rss')
] ]
def get_article_url(self, article):
ans = article.link
try:
self.log('Looking for full story link in', ans)
soup = self.index_to_soup(ans)
x = soup.find(text="View All")
if x is not None:
ans = ans + '?viewAll=y'
self.log('Found full story link', ans)
except:
pass
return ans
def postprocess_html(self, soup,first):
for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
tag.extract()
for tag in soup.findAll(name='br'):
tag.extract()
return soup

View File

@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Sciencenews(BasicNewsRecipe): class Sciencenews(BasicNewsRecipe):
title = u'ScienceNews' title = u'ScienceNews'
__author__ = u'Darko Miletic' __author__ = u'Darko Miletic and Sujata Raman'
description = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News." description = u"Science News is an award-winning weekly newsmagazine covering the most important research in all fields of science. Its 16 pages each week are packed with short, accurate articles that appeal to both general readers and scientists. Published since 1922, the magazine now reaches about 150,000 subscribers and more than 1 million readers. These are the latest News Items from Science News."
oldest_article = 30 oldest_article = 30
language = 'en' language = 'en'
@ -17,13 +17,45 @@ class Sciencenews(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
timefmt = ' [%A, %d %B, %Y]' timefmt = ' [%A, %d %B, %Y]'
extra_css = '''
.content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
.content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
.content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
.content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
.exclusive{color:#FF0000 ;}
.anonymous{color:#14487E ;}
.content_content{font-family:helvetica,arial ;font-size: x-small ; color:#000000;}
.description{color:#585858;font-family:helvetica,arial ;font-size: xx-small ;}
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
'''
keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ] keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'}) remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
remove_tags = [ remove_tags = [
dict(name='ul', attrs={'id':'content_functions_bottom'}) dict(name='ul', attrs={'id':'content_functions_bottom'})
,dict(name='div', attrs={'id':'content_functions_top'}) ,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
,dict(name='img', attrs={'class':'icon'})
,dict(name='div', attrs={'class': 'embiggen'})
] ]
feeds = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')] feeds = [(u"Science News / News Items", u'http://sciencenews.org/view/feed/type/news/name/news.rss')]
def get_cover_url(self):
cover_url = None
index = 'http://www.sciencenews.org/view/home'
soup = self.index_to_soup(index)
link_item = soup.find(name = 'img',alt = "issue")
print link_item
if link_item:
cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
return cover_url
def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.name = 'div'
return soup

View File

@ -6,51 +6,86 @@ __docformat__ = 'restructuredtext en'
''' '''
smh.com.au smh.com.au
''' '''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class SMH(BasicNewsRecipe): class SMH(BasicNewsRecipe):
title = 'Sydney Morning Herald' title = 'Sydney Morning Herald'
description = 'Business News, World News and Breaking News in Australia' description = 'Business News, World News and Breaking News in Australia'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal and Sujata Raman'
language = 'en_AU' language = 'en_AU'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
no_javascript = True
timefmt = ' [%A, %d %B, %Y]'
encoding = 'utf-8'
keep_only_tags = [dict(name='div', attrs ={'id':'content'})]
remove_tags = [
dict(name='div', attrs={'align' :'right'}),
dict(name='p', attrs={'class' :'comments'}),
dict(name='a', attrs={'class' :['more-photos','performerpromo']}),
dict(name='img', attrs={'alt' :'aap'}),
dict(name='div', attrs ={'id':['googleAds','moreGoogleAds','comments','footer','sidebar','austereopuff','adSpotIsland']}),
dict(name='div', attrs ={'class':['article-links','wof','articleTools top','cN-multimediaGroup cfix','articleTools bottom']}),
dict(name='div', attrs ={'class':['clear','adSpot-textboxgr1','adSpot-textBox','articleTools-c3 cfix','articleExtras-bottom','span-16 last']}),
dict(name='div', attrs ={'class':[ 'sidebar span-5','cT-socialCommenting','cN-linkList','cN-topicSelector','cT-storyTools cfix','cT-imageMultimedia']}) ,
dict(name='iframe'),
]
extra_css = '''
h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;}
.cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
.articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;}
.cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;}
.source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;}
#content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
.pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;}
#bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
.featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
#idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
'''
feeds = [
('Top Stories', 'http://feeds.smh.com.au/rssheadlines/top.xml'),
('National', 'http://feeds.smh.com.au/rssheadlines/national.xml'),
('World', 'http://feeds.smh.com.au/rssheadlines/world.xml'),
('Business', 'http://www.smh.com.au/rssheadlines/business.xml'),
('National Times', 'http://www.smh.com.au/rssheadlines/opinion/article/rss.xml'),
('Entertainment', 'http://feeds.smh.com.au/rssheadlines/entertainment.xml'),
('Technology', 'http://feeds.smh.com.au/rssheadlines/technology.xml'),
('Sport', 'http://feeds.smh.com.au/rssheadlines/sport.xml'),
]
def preprocess_html(self, soup):
bod = soup.find('bod')
if bod is not None:
bod.tag = 'div'
p = soup.find(id='content')
bod.extract()
p.insert(len(p), bod)
return soup
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
br.set_handle_refresh(False) br.set_handle_refresh(False)
return br return br
def parse_index(self): def get_article_url(self, article):
url = article.link
soup = BeautifulSoup(self.browser.open('http://www.smh.com.au/text/').read()) if 'media' in url:
url = ''
feeds, articles = [], [] return url
feed = None
for tag in soup.findAll(['h3', 'a']):
if tag.name == 'h3':
if articles:
feeds.append((feed, articles))
articles = []
feed = self.tag_to_string(tag)
elif feed is not None and tag.has_key('href') and tag['href'].strip():
url = tag['href'].strip()
if url.startswith('/'):
url = 'http://www.smh.com.au' + url
title = self.tag_to_string(tag)
articles.append({
'title': title,
'url' : url,
'date' : strftime('%a, %d %b'),
'description' : '',
'content' : '',
})
return feeds

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.6.23' __version__ = '0.6.24'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
import re import re

View File

@ -101,8 +101,6 @@ def metadata_sources(metadata_type='basic', customize=True, isbndb_key=None):
plugin.site_customization = customization.get(plugin.name, None) plugin.site_customization = customization.get(plugin.name, None)
if plugin.name == 'IsbnDB' and isbndb_key is not None: if plugin.name == 'IsbnDB' and isbndb_key is not None:
plugin.site_customization = isbndb_key plugin.site_customization = isbndb_key
if not plugin.is_ok():
continue
yield plugin yield plugin
def get_isbndb_key(): def get_isbndb_key():

View File

@ -92,3 +92,8 @@ class POCKETBOOK360(EB600):
VENDOR_NAME = 'PHILIPS' VENDOR_NAME = 'PHILIPS'
WINDOWS_MAIN_MEM = 'MASS_STORGE' WINDOWS_MAIN_MEM = 'MASS_STORGE'
OSX_MAIN_MEM = 'Philips Mass Storge Media'
OSX_CARD_A_MEM = 'Philips Mass Storge Media'

View File

@ -66,14 +66,24 @@ class USBMS(CLI, Device):
match = fnmatch.filter(files, '*.%s' % (book_type)) match = fnmatch.filter(files, '*.%s' % (book_type))
for i, filename in enumerate(match): for i, filename in enumerate(match):
self.report_progress((i+1) / float(len(match)), _('Getting list of books on device...')) self.report_progress((i+1) / float(len(match)), _('Getting list of books on device...'))
bl.append(self.__class__.book_from_path(os.path.join(path, filename))) try:
bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
except: # Probably a filename encoding error
import traceback
traceback.print_exc()
continue
else: else:
path = os.path.join(prefix, ebook_dir) path = os.path.join(prefix, ebook_dir)
paths = os.listdir(path) paths = os.listdir(path)
for i, filename in enumerate(paths): for i, filename in enumerate(paths):
self.report_progress((i+1) / float(len(paths)), _('Getting list of books on device...')) self.report_progress((i+1) / float(len(paths)), _('Getting list of books on device...'))
if path_to_ext(filename) in self.FORMATS: if path_to_ext(filename) in self.FORMATS:
bl.append(self.__class__.book_from_path(os.path.join(path, filename))) try:
bl.append(self.__class__.book_from_path(os.path.join(path, filename)))
except: # Probably a file name encoding error
import traceback
traceback.print_exc()
continue
self.report_progress(1.0, _('Getting list of books on device...')) self.report_progress(1.0, _('Getting list of books on device...'))

View File

@ -9,9 +9,11 @@ from threading import Thread
from calibre import prints from calibre import prints
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.ebooks.metadata import MetaInformation
from calibre.customize import Plugin from calibre.customize import Plugin
metadata_config = None
class MetadataSource(Plugin): class MetadataSource(Plugin):
author = 'Kovid Goyal' author = 'Kovid Goyal'
@ -23,11 +25,17 @@ class MetadataSource(Plugin):
#: tags/rating/reviews/etc. #: tags/rating/reviews/etc.
metadata_type = 'basic' metadata_type = 'basic'
#: If not None, the customization dialog will allow for string
#: based customization as well the default customization. The
#: string customization will be saved in the site_customization
#: member.
string_customization_help = None
type = _('Metadata download') type = _('Metadata download')
def __call__(self, title, author, publisher, isbn, verbose, log=None, def __call__(self, title, author, publisher, isbn, verbose, log=None,
extra=None): extra=None):
self.worker = Thread(target=self.fetch) self.worker = Thread(target=self._fetch)
self.worker.daemon = True self.worker.daemon = True
self.title = title self.title = title
self.verbose = verbose self.verbose = verbose
@ -39,23 +47,87 @@ class MetadataSource(Plugin):
self.exception, self.tb, self.results = None, None, [] self.exception, self.tb, self.results = None, None, []
self.worker.start() self.worker.start()
def _fetch(self):
try:
self.fetch()
if self.results:
c = self.config_store().get(self.name, {})
res = self.results
if isinstance(res, MetaInformation):
res = [res]
for mi in res:
if not c.get('rating', True):
mi.rating = None
if not c.get('comments', True):
mi.comments = None
if not c.get('tags', True):
mi.tags = []
except Exception, e:
self.exception = e
self.tb = traceback.format_exc()
def fetch(self): def fetch(self):
''' '''
All the actual work is done here. All the actual work is done here.
''' '''
raise NotImplementedError raise NotImplementedError
def is_ok(self):
'''
Used to check if the plugin has been correctly customized.
For example: The isbndb plugin checks to see if the site_customization
has been set with an isbndb.com access key.
'''
return True
def join(self): def join(self):
return self.worker.join() return self.worker.join()
def is_customizable(self):
return True
def config_store(self):
global metadata_config
if metadata_config is None:
from calibre.utils.config import XMLConfig
metadata_config = XMLConfig('plugins/metadata_download')
return metadata_config
def config_widget(self):
from PyQt4.Qt import QWidget, QVBoxLayout, QLabel, Qt, QLineEdit, \
QCheckBox
from calibre.customize.ui import config
w = QWidget()
w._layout = QVBoxLayout(w)
w.setLayout(w._layout)
if self.string_customization_help is not None:
w._sc_label = QLabel(self.string_customization_help, w)
w._layout.addWidget(w._sc_label)
customization = config['plugin_customization']
def_sc = customization.get(self.name, '')
if not def_sc:
def_sc = ''
w._sc = QLineEdit(def_sc, w)
w._layout.addWidget(w._sc)
w._sc_label.setWordWrap(True)
w._sc_label.setTextInteractionFlags(Qt.LinksAccessibleByMouse
| Qt.LinksAccessibleByKeyboard)
w._sc_label.setOpenExternalLinks(True)
c = self.config_store()
c = c.get(self.name, {})
for x, l in {'rating':_('ratings'), 'tags':_('tags'),
'comments':_('description/reviews')}.items():
cb = QCheckBox(_('Download %s from %s')%(l,
self.name))
setattr(w, '_'+x, cb)
cb.setChecked(c.get(x, True))
w._layout.addWidget(cb)
return w
def save_settings(self, w):
dl_settings = {}
for x in ('rating', 'tags', 'comments'):
dl_settings[x] = getattr(w, '_'+x).isChecked()
c = self.config_store()
c.set(self.name, dl_settings)
if hasattr(w, '_sc'):
sc = unicode(w._sc.text()).strip()
from calibre.customize.ui import customize_plugin
customize_plugin(self, sc)
class GoogleBooks(MetadataSource): class GoogleBooks(MetadataSource):
@ -102,14 +174,11 @@ class ISBNDB(MetadataSource):
self.exception = e self.exception = e
self.tb = traceback.format_exc() self.tb = traceback.format_exc()
def customization_help(self, gui=False): @property
def string_customization_help(self):
ans = _('To use isbndb.com you must sign up for a %sfree account%s ' ans = _('To use isbndb.com you must sign up for a %sfree account%s '
'and enter your access key below.') 'and enter your access key below.')
if gui: return '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
ans = '<p>'+ans%('<a href="http://www.isbndb.com">', '</a>')
else:
ans = ans.replace('%s', '')
return ans
class Amazon(MetadataSource): class Amazon(MetadataSource):
@ -191,7 +260,7 @@ def get_social_metadata(mi, verbose=0):
comments.add(dmi.comments) comments.add(dmi.comments)
if ratings: if ratings:
rating = sum(ratings)/float(len(ratings)) rating = sum(ratings)/float(len(ratings))
if mi.rating is None: if mi.rating is None or mi.rating < 0.1:
mi.rating = rating mi.rating = rating
else: else:
mi.rating = (mi.rating + rating)/2.0 mi.rating = (mi.rating + rating)/2.0

View File

@ -3,6 +3,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from PDF files''' '''Read meta information from PDF files'''
import re
from functools import partial from functools import partial
from calibre import prints from calibre import prints
@ -11,10 +12,16 @@ from calibre.ebooks.metadata import MetaInformation, string_to_authors, authors_
pdfreflow, pdfreflow_error = plugins['pdfreflow'] pdfreflow, pdfreflow_error = plugins['pdfreflow']
_isbn_pat = re.compile(r'ISBN[: ]*([-0-9Xx]+)')
def get_metadata(stream, cover=True): def get_metadata(stream, cover=True):
if pdfreflow is None: if pdfreflow is None:
raise RuntimeError(pdfreflow_error) raise RuntimeError(pdfreflow_error)
info = pdfreflow.get_metadata(stream.read(), cover) raw = stream.read()
isbn = _isbn_pat.search(raw)
if isbn is not None:
isbn = isbn.group(1).replace('-', '').replace(' ', '')
info = pdfreflow.get_metadata(raw, cover)
title = info.get('Title', None) title = info.get('Title', None)
au = info.get('Author', None) au = info.get('Author', None)
if au is None: if au is None:
@ -22,6 +29,8 @@ def get_metadata(stream, cover=True):
else: else:
au = string_to_authors(au) au = string_to_authors(au)
mi = MetaInformation(title, au) mi = MetaInformation(title, au)
if isbn is not None:
mi.isbn = isbn
creator = info.get('Creator', None) creator = info.get('Creator', None)
if creator: if creator:

View File

@ -777,7 +777,7 @@ class Manifest(object):
# Remove DOCTYPE declaration as it messes up parsing # Remove DOCTYPE declaration as it messes up parsing
# Inparticular it causes tostring to insert xmlns # In particular, it causes tostring to insert xmlns
# declarations, which messes up the coercing logic # declarations, which messes up the coercing logic
idx = data.find('<html') idx = data.find('<html')
if idx > -1: if idx > -1:
@ -1746,9 +1746,20 @@ class OEBBook(object):
return d.replace('\r\n', '\n').replace('\r', '\n') return d.replace('\r\n', '\n').replace('\r', '\n')
if isinstance(data, unicode): if isinstance(data, unicode):
return fix_data(data) return fix_data(data)
if data[:2] in ('\xff\xfe', '\xfe\xff'): bom_enc = None
if data[:4] in ('\0\0\xfe\xff', '\xff\xfe\0\0'):
bom_enc = {'\0\0\xfe\xff':'utf-32-be',
'\xff\xfe\0\0':'utf-32-le'}[data[:4]]
data = data[4:]
elif data[:2] in ('\xff\xfe', '\xfe\xff'):
bom_enc = {'\xff\xfe':'utf-16-le', '\xfe\xff':'utf-16-be'}[data[:2]]
data = data[2:]
elif data[:3] == '\xef\xbb\xbf':
bom_enc = 'utf-8'
data = data[3:]
if bom_enc is not None:
try: try:
return fix_data(data.decode('utf-16')) return fix_data(data.decode(bom_enc))
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
if self.input_encoding is not None: if self.input_encoding is not None:

View File

@ -31,6 +31,13 @@ class BulkConfig(Config):
self.input_label.hide() self.input_label.hide()
self.input_formats.hide() self.input_formats.hide()
self.opt_individual_saved_settings.setVisible(True)
self.opt_individual_saved_settings.setChecked(True)
self.opt_individual_saved_settings.setToolTip(_('For '
'settings that cannot be specified in this dialog, use the '
'values saved in a previous conversion (if they exist) instead '
'of using the defaults specified in the Preferences'))
self.connect(self.output_formats, SIGNAL('currentIndexChanged(QString)'), self.connect(self.output_formats, SIGNAL('currentIndexChanged(QString)'),
self.setup_pipeline) self.setup_pipeline)

View File

@ -116,6 +116,7 @@ class Config(ResizableDialog, Ui_Dialog):
def __init__(self, parent, db, book_id, def __init__(self, parent, db, book_id,
preferred_input_format=None, preferred_output_format=None): preferred_input_format=None, preferred_output_format=None):
ResizableDialog.__init__(self, parent) ResizableDialog.__init__(self, parent)
self.opt_individual_saved_settings.setVisible(False)
self.db, self.book_id = db, book_id self.db, self.book_id = db, book_id
self.setup_input_output_formats(self.db, self.book_id, preferred_input_format, self.setup_input_output_formats(self.db, self.book_id, preferred_input_format,

View File

@ -33,6 +33,13 @@
<item> <item>
<widget class="QComboBox" name="input_formats"/> <widget class="QComboBox" name="input_formats"/>
</item> </item>
<item>
<widget class="QCheckBox" name="opt_individual_saved_settings">
<property name="text">
<string>Use &amp;saved conversion settings for individual books</string>
</property>
</widget>
</item>
<item> <item>
<spacer name="horizontalSpacer"> <spacer name="horizontalSpacer">
<property name="orientation"> <property name="orientation">
@ -109,7 +116,7 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>810</width> <width>810</width>
<height>492</height> <height>489</height>
</rect> </rect>
</property> </property>
<layout class="QVBoxLayout" name="verticalLayout_3"> <layout class="QVBoxLayout" name="verticalLayout_3">

View File

@ -6,7 +6,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
from PyQt4.QtCore import SIGNAL, QObject from PyQt4.QtCore import SIGNAL, QObject
from PyQt4.QtGui import QDialog from PyQt4.QtGui import QDialog
from calibre.gui2 import qstring_to_unicode
from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog
from calibre.gui2.dialogs.tag_editor import TagEditor from calibre.gui2.dialogs.tag_editor import TagEditor
from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \ from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string, \
@ -86,7 +85,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
def sync(self): def sync(self):
for id in self.ids: for id in self.ids:
au = qstring_to_unicode(self.authors.text()) au = unicode(self.authors.text())
if au: if au:
au = string_to_authors(au) au = string_to_authors(au)
self.db.set_authors(id, au, notify=False) self.db.set_authors(id, au, notify=False)
@ -97,28 +96,39 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
x = authors_to_sort_string(aut) x = authors_to_sort_string(aut)
if x: if x:
self.db.set_author_sort(id, x, notify=False) self.db.set_author_sort(id, x, notify=False)
aus = qstring_to_unicode(self.author_sort.text()) aus = unicode(self.author_sort.text())
if aus and self.author_sort.isEnabled(): if aus and self.author_sort.isEnabled():
self.db.set_author_sort(id, aus, notify=False) self.db.set_author_sort(id, aus, notify=False)
if self.write_rating: if self.write_rating:
self.db.set_rating(id, 2*self.rating.value(), notify=False) self.db.set_rating(id, 2*self.rating.value(), notify=False)
pub = qstring_to_unicode(self.publisher.text()) pub = unicode(self.publisher.text())
if pub: if pub:
self.db.set_publisher(id, pub, notify=False) self.db.set_publisher(id, pub, notify=False)
remove_tags = qstring_to_unicode(self.remove_tags.text()).strip() remove_tags = unicode(self.remove_tags.text()).strip()
if remove_tags: if remove_tags:
remove_tags = [i.strip() for i in remove_tags.split(',')] remove_tags = [i.strip() for i in remove_tags.split(',')]
self.db.unapply_tags(id, remove_tags, notify=False) self.db.unapply_tags(id, remove_tags, notify=False)
tags = qstring_to_unicode(self.tags.text()).strip() tags = unicode(self.tags.text()).strip()
if tags: if tags:
tags = map(lambda x: x.strip(), tags.split(',')) tags = map(lambda x: x.strip(), tags.split(','))
self.db.set_tags(id, tags, append=True, notify=False) self.db.set_tags(id, tags, append=True, notify=False)
if self.write_series: if self.write_series:
self.db.set_series(id, qstring_to_unicode(self.series.currentText()), notify=False) self.db.set_series(id, unicode(self.series.currentText()), notify=False)
if self.remove_format.currentIndex() > -1: if self.remove_format.currentIndex() > -1:
self.db.remove_format(id, unicode(self.remove_format.currentText()), index_is_id=True, notify=False) self.db.remove_format(id, unicode(self.remove_format.currentText()), index_is_id=True, notify=False)
if self.swap_title_and_author.isChecked():
title = self.db.title(id, index_is_id=True)
aum = self.db.authors(id, index_is_id=True)
if aum:
aum = [a.strip().replace('|', ',') for a in aum.split(',')]
new_title = authors_to_string(aum)
self.db.set_title(id, new_title)
if title:
new_authors = string_to_authors(title)
self.db.set_authors(id, new_authors)
self.changed = True self.changed = True
def series_changed(self): def series_changed(self):

View File

@ -7,7 +7,7 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>495</width> <width>495</width>
<height>387</height> <height>456</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
@ -230,6 +230,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="9" column="0" colspan="2">
<widget class="QCheckBox" name="swap_title_and_author">
<property name="text">
<string>&amp;Swap title and author</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</item> </item>

View File

@ -552,6 +552,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
warning_dialog(self, _('There were errors'), warning_dialog(self, _('There were errors'),
_('There were errors downloading social metadata'), _('There were errors downloading social metadata'),
det_msg=det, show=True) det_msg=det, show=True)
else:
book.tags = []
self.title.setText(book.title) self.title.setText(book.title)
self.authors.setText(authors_to_string(book.authors)) self.authors.setText(authors_to_string(book.authors))
if book.author_sort: self.author_sort.setText(book.author_sort) if book.author_sort: self.author_sort.setText(book.author_sort)

View File

@ -68,6 +68,7 @@ class LibraryDelegate(QItemDelegate):
self.drawFocus(painter, option, option.rect) self.drawFocus(painter, option, option.rect)
try: try:
painter.setRenderHint(QPainter.Antialiasing) painter.setRenderHint(QPainter.Antialiasing)
painter.setClipRect(option.rect)
y = option.rect.center().y()-self.SIZE/2. y = option.rect.center().y()-self.SIZE/2.
x = option.rect.right() - self.SIZE x = option.rect.right() - self.SIZE
painter.setPen(self.PEN) painter.setPen(self.PEN)

View File

@ -213,19 +213,18 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.device_manager.umount_device) self.device_manager.umount_device)
####################### Vanity ######################## ####################### Vanity ########################
self.vanity_template = _('<p>For help visit <a href="http://%s.' self.vanity_template = _('<p>For help see the: <a href="%s">User Manual</a>'
'kovidgoyal.net/user_manual">%s.kovidgoyal.net</a>' '<br>')%'http://calibre.kovidgoyal.net/user_manual'
'<br>')%(__appname__, __appname__)
self.vanity_template += _('<b>%s</b>: %s by <b>Kovid Goyal ' self.vanity_template += _('<b>%s</b>: %s by <b>Kovid Goyal '
'%%(version)s</b><br>%%(device)s</p>')%(__appname__, __version__) '%%(version)s</b><br>%%(device)s</p>')%(__appname__, __version__)
self.latest_version = ' ' self.latest_version = ' '
self.vanity.setText(self.vanity_template%dict(version=' ', device=' ')) self.vanity.setText(self.vanity_template%dict(version=' ', device=' '))
self.device_info = ' ' self.device_info = ' '
if not opts.no_update_check: if not opts.no_update_check:
self.update_checker = CheckForUpdates() self.update_checker = CheckForUpdates(self)
QObject.connect(self.update_checker, QObject.connect(self.update_checker,
SIGNAL('update_found(PyQt_PyObject)'), self.update_found) SIGNAL('update_found(PyQt_PyObject)'), self.update_found)
self.update_checker.start() self.update_checker.start(2000)
####################### Status Bar ##################### ####################### Status Bar #####################
self.status_bar = StatusBar(self.jobs_dialog, self.system_tray_icon) self.status_bar = StatusBar(self.jobs_dialog, self.system_tray_icon)
self.setStatusBar(self.status_bar) self.setStatusBar(self.status_bar)
@ -246,6 +245,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
md.addAction(_('Download metadata and covers')) md.addAction(_('Download metadata and covers'))
md.addAction(_('Download only metadata')) md.addAction(_('Download only metadata'))
md.addAction(_('Download only covers')) md.addAction(_('Download only covers'))
md.addAction(_('Download only social metadata'))
self.metadata_menu = md self.metadata_menu = md
self.add_menu = QMenu() self.add_menu = QMenu()
self.add_menu.addAction(_('Add books from a single directory')) self.add_menu.addAction(_('Add books from a single directory'))
@ -288,7 +288,10 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
set_metadata=False) set_metadata=False)
QObject.connect(md.actions()[6], SIGNAL('triggered(bool)'), QObject.connect(md.actions()[6], SIGNAL('triggered(bool)'),
self.__em5__) self.__em5__)
self.__em6__ = partial(self.download_metadata, covers=False,
set_metadata=False, set_social_metadata=True)
QObject.connect(md.actions()[7], SIGNAL('triggered(bool)'),
self.__em6__)
self.save_menu = QMenu() self.save_menu = QMenu()
@ -1027,7 +1030,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
############################### Edit metadata ############################## ############################### Edit metadata ##############################
def download_metadata(self, checked, covers=True, set_metadata=True): def download_metadata(self, checked, covers=True, set_metadata=True,
set_social_metadata=None):
rows = self.library_view.selectionModel().selectedRows() rows = self.library_view.selectionModel().selectedRows()
previous = self.library_view.currentIndex() previous = self.library_view.currentIndex()
if not rows or len(rows) == 0: if not rows or len(rows) == 0:
@ -1037,12 +1041,19 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
return return
db = self.library_view.model().db db = self.library_view.model().db
ids = [db.id(row.row()) for row in rows] ids = [db.id(row.row()) for row in rows]
if set_social_metadata is None:
get_social_metadata = config['get_social_metadata']
else:
get_social_metadata = set_social_metadata
from calibre.gui2.metadata import DownloadMetadata from calibre.gui2.metadata import DownloadMetadata
self._download_book_metadata = DownloadMetadata(db, ids, self._download_book_metadata = DownloadMetadata(db, ids,
get_covers=covers, set_metadata=set_metadata, get_covers=covers, set_metadata=set_metadata,
get_social_metadata=config['get_social_metadata']) get_social_metadata=get_social_metadata)
self._download_book_metadata.start() self._download_book_metadata.start()
x = _('covers') if covers and not set_metadata else _('metadata') if set_social_metadata is not None and set_social_metadata:
x = _('social metadata')
else:
x = _('covers') if covers and not set_metadata else _('metadata')
self.progress_indicator.start( self.progress_indicator.start(
_('Downloading %s for %d book(s)')%(x, len(ids))) _('Downloading %s for %d book(s)')%(x, len(ids)))
self._book_metadata_download_check = QTimer(self) self._book_metadata_download_check = QTimer(self)
@ -1744,6 +1755,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
if write_settings: if write_settings:
self.write_settings() self.write_settings()
self.check_messages_timer.stop() self.check_messages_timer.stop()
self.update_checker.stop()
self.listener.close() self.listener.close()
self.job_manager.server.close() self.job_manager.server.close()
while self.spare_servers: while self.spare_servers:

View File

@ -60,6 +60,7 @@ class DownloadMetadata(Thread):
self.worker = Worker() self.worker = Worker()
for id in ids: for id in ids:
self.metadata[id] = db.get_metadata(id, index_is_id=True) self.metadata[id] = db.get_metadata(id, index_is_id=True)
self.metadata[id].rating = None
def run(self): def run(self):
self.exception = self.tb = None self.exception = self.tb = None
@ -100,15 +101,28 @@ class DownloadMetadata(Thread):
mi.smart_update(fmi) mi.smart_update(fmi)
if mi.isbn and self.get_social_metadata: if mi.isbn and self.get_social_metadata:
self.social_metadata_exceptions = get_social_metadata(mi) self.social_metadata_exceptions = get_social_metadata(mi)
if mi.rating:
mi.rating *= 2
if not self.get_social_metadata:
mi.tags = []
else: else:
self.failures[id] = (mi.title, self.failures[id] = (mi.title,
_('No matches found for this book')) _('No matches found for this book'))
self.commit_covers() self.commit_covers()
self.commit_covers(True) self.commit_covers(True)
if self.set_metadata: for id in self.fetched_metadata:
for id in self.fetched_metadata: mi = self.metadata[id]
self.db.set_metadata(id, self.metadata[id]) if self.set_metadata:
self.db.set_metadata(id, mi)
if not self.set_metadata and self.get_social_metadata:
if mi.rating:
self.db.set_rating(id, mi.rating)
if mi.tags:
self.db.set_tags(id, mi.tags)
if mi.comments:
self.db.set_comment(id, mi.comments)
self.updated = set(self.fetched_metadata) self.updated = set(self.fetched_metadata)

View File

@ -47,7 +47,10 @@ class TagsView(QTreeView):
ci = self.currentIndex() ci = self.currentIndex()
if not ci.isValid(): if not ci.isValid():
ci = self.indexAt(QPoint(10, 10)) ci = self.indexAt(QPoint(10, 10))
self.model().refresh() try:
self.model().refresh()
except: #Database connection could be closed if an integrity check is happening
pass
if ci.isValid(): if ci.isValid():
self.scrollTo(ci, QTreeView.PositionAtTop) self.scrollTo(ci, QTreeView.PositionAtTop)

View File

@ -111,17 +111,21 @@ def convert_bulk_ebook(parent, queue, db, book_ids, out_format=None, args=[]):
user_recs = cPickle.loads(d.recommendations) user_recs = cPickle.loads(d.recommendations)
book_ids = convert_existing(parent, db, book_ids, output_format) book_ids = convert_existing(parent, db, book_ids, output_format)
return QueueBulk(parent, book_ids, output_format, queue, db, user_recs, args) use_saved_single_settings = d.opt_individual_saved_settings.isChecked()
return QueueBulk(parent, book_ids, output_format, queue, db, user_recs,
args, use_saved_single_settings=use_saved_single_settings)
class QueueBulk(QProgressDialog): class QueueBulk(QProgressDialog):
def __init__(self, parent, book_ids, output_format, queue, db, user_recs, args): def __init__(self, parent, book_ids, output_format, queue, db, user_recs,
args, use_saved_single_settings=True):
QProgressDialog.__init__(self, '', QProgressDialog.__init__(self, '',
QString(), 0, len(book_ids), parent) QString(), 0, len(book_ids), parent)
self.setWindowTitle(_('Queueing books for bulk conversion')) self.setWindowTitle(_('Queueing books for bulk conversion'))
self.book_ids, self.output_format, self.queue, self.db, self.args, self.user_recs = \ self.book_ids, self.output_format, self.queue, self.db, self.args, self.user_recs = \
book_ids, output_format, queue, db, args, user_recs book_ids, output_format, queue, db, args, user_recs
self.parent = parent self.parent = parent
self.use_saved_single_settings = use_saved_single_settings
self.i, self.bad, self.jobs, self.changed = 0, [], [], False self.i, self.bad, self.jobs, self.changed = 0, [], [], False
self.timer = QTimer(self) self.timer = QTimer(self)
self.connect(self.timer, SIGNAL('timeout()'), self.do_book) self.connect(self.timer, SIGNAL('timeout()'), self.do_book)
@ -149,11 +153,12 @@ class QueueBulk(QProgressDialog):
combined_recs = GuiRecommendations() combined_recs = GuiRecommendations()
default_recs = load_defaults('%s_input' % input_format) default_recs = load_defaults('%s_input' % input_format)
specific_recs = load_specifics(self.db, book_id)
for key in default_recs: for key in default_recs:
combined_recs[key] = default_recs[key] combined_recs[key] = default_recs[key]
for key in specific_recs: if self.use_saved_single_settings:
combined_recs[key] = specific_recs[key] specific_recs = load_specifics(self.db, book_id)
for key in specific_recs:
combined_recs[key] = specific_recs[key]
for item in self.user_recs: for item in self.user_recs:
combined_recs[item[0]] = item[1] combined_recs[item[0]] = item[1]
save_specifics(self.db, book_id, combined_recs) save_specifics(self.db, book_id, combined_recs)

View File

@ -3,7 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import traceback import traceback
from PyQt4.QtCore import QThread, SIGNAL from PyQt4.QtCore import QObject, SIGNAL, QTimer
import mechanize import mechanize
from calibre.constants import __version__, iswindows, isosx from calibre.constants import __version__, iswindows, isosx
@ -11,9 +11,21 @@ from calibre import browser
URL = 'http://status.calibre-ebook.com/latest' URL = 'http://status.calibre-ebook.com/latest'
class CheckForUpdates(QThread): class CheckForUpdates(QObject):
def __init__(self, parent):
QObject.__init__(self, parent)
self.timer = QTimer(self)
self.first = True
self.connect(self.timer, SIGNAL('timeout()'), self)
self.start = self.timer.start
self.stop = self.timer.stop
def __call__(self):
if self.first:
self.timer.setInterval(1000*24*60*60)
self.first = False
def run(self):
try: try:
br = browser() br = browser()
req = mechanize.Request(URL) req = mechanize.Request(URL)

View File

@ -9,7 +9,6 @@ Command line interface to the calibre database.
import sys, os, cStringIO import sys, os, cStringIO
from textwrap import TextWrapper from textwrap import TextWrapper
from urllib import quote
from calibre import terminal_controller, preferred_encoding, prints from calibre import terminal_controller, preferred_encoding, prints
from calibre.utils.config import OptionParser, prefs from calibre.utils.config import OptionParser, prefs
@ -48,10 +47,10 @@ XML_TEMPLATE = '''\
<comments>${record['comments']}</comments> <comments>${record['comments']}</comments>
<series py:if="record['series']" index="${record['series_index']}">${record['series']}</series> <series py:if="record['series']" index="${record['series_index']}">${record['series']}</series>
<isbn>${record['isbn']}</isbn> <isbn>${record['isbn']}</isbn>
<cover py:if="record['cover']">${record['cover']}</cover> <cover py:if="record['cover']">${record['cover'].replace(os.sep, '/')}</cover>
<formats py:if="record['formats']"> <formats py:if="record['formats']">
<py:for each="path in record['formats']"> <py:for each="path in record['formats']">
<format>${path}</format> <format>${path.replace(os.sep, '/')}</format>
</py:for> </py:for>
</formats> </formats>
</record> </record>
@ -78,9 +77,9 @@ STANZA_TEMPLATE='''\
<id>urn:calibre:${record['uuid']}</id> <id>urn:calibre:${record['uuid']}</id>
<author><name>${record['author_sort']}</name></author> <author><name>${record['author_sort']}</name></author>
<updated>${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%SZ')}</updated> <updated>${record['timestamp'].strftime('%Y-%m-%dT%H:%M:%SZ')}</updated>
<link type="application/epub+zip" href="${quote(record['fmt_epub'].replace(sep, '/')).replace('http%3A', 'http:')}" /> <link type="application/epub+zip" href="${quote(record['fmt_epub'].replace(sep, '/'))}"/>
<link py:if="record['cover']" rel="x-stanza-cover-image" type="image/png" href="${quote(record['cover'].replace(sep, '/')).replace('http%3A', 'http:')}" /> <link py:if="record['cover']" rel="x-stanza-cover-image" type="image/png" href="${quote(record['cover'].replace(sep, '/'))}"/>
<link py:if="record['cover']" rel="x-stanza-cover-image-thumbnail" type="image/png" href="${quote(record['cover'].replace(sep, '/')).replace('http%3A', 'http:')}" /> <link py:if="record['cover']" rel="x-stanza-cover-image-thumbnail" type="image/png" href="${quote(record['cover'].replace(sep, '/'))}"/>
<content type="xhtml"> <content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml"> <div xmlns="http://www.w3.org/1999/xhtml">
<py:for each="f in ('authors', 'publisher', 'rating', 'tags', 'series', 'isbn')"> <py:for each="f in ('authors', 'publisher', 'rating', 'tags', 'series', 'isbn')">
@ -186,8 +185,10 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
return o.getvalue() return o.getvalue()
elif output_format == 'xml': elif output_format == 'xml':
template = MarkupTemplate(XML_TEMPLATE) template = MarkupTemplate(XML_TEMPLATE)
return template.generate(data=data).render('xml') return template.generate(data=data, os=os).render('xml')
elif output_format == 'stanza': elif output_format == 'stanza':
def quote(raw):
return raw.replace('"', r'\"')
data = [i for i in data if i.has_key('fmt_epub')] data = [i for i in data if i.has_key('fmt_epub')]
for x in data: for x in data:
if isinstance(x['fmt_epub'], unicode): if isinstance(x['fmt_epub'], unicode):

View File

@ -115,7 +115,7 @@ class PostInstall:
self.info('Creating symlinks...') self.info('Creating symlinks...')
for exe in scripts.keys(): for exe in scripts.keys():
dest = os.path.join(self.opts.staging_bindir, exe) dest = os.path.join(self.opts.staging_bindir, exe)
if os.path.exists(dest): if os.path.lexists(dest):
os.unlink(dest) os.unlink(dest)
tgt = os.path.join(getattr(sys, 'frozen_path'), exe) tgt = os.path.join(getattr(sys, 'frozen_path'), exe)
self.info('\tSymlinking %s to %s'%(tgt, dest)) self.info('\tSymlinking %s to %s'%(tgt, dest))

View File

@ -421,7 +421,7 @@ button in the individual book conversion dialog.
When you Bulk Convert a set of books, settings are taken in the following order: When you Bulk Convert a set of books, settings are taken in the following order:
* From the defaults set in Preferences->Conversion * From the defaults set in Preferences->Conversion
* From the saved conversion settings for each book being converted (if any) * From the saved conversion settings for each book being converted (if any). This can be turned off by the option in the top left corner of the Bulk Conversion dialog.
* From the settings set in the Bulk conversion dialog * From the settings set in the Bulk conversion dialog
Note that the final settings for each book in a Bulk Conversion will be saved and re-used if the book is converted again. Since the Note that the final settings for each book in a Bulk Conversion will be saved and re-used if the book is converted again. Since the

View File

@ -81,7 +81,7 @@ Device Integration
What devices does |app| support? What devices does |app| support?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
At the moment |app| has full support for the SONY PRS 300/500/505/600/700, Cybook Gen 3/Opus, Amazon Kindle 1/2/DX, Netronix EB600, Ectaco Jetbook, BeBook/BeBook Mini, Irex Illiad/DR1000, Foxit eSlick, Android phones and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. At the moment |app| has full support for the SONY PRS 300/500/505/600/700, Cybook Gen 3/Opus, Amazon Kindle 1/2/DX, Netronix EB600, Ectaco Jetbook, BeBook/BeBook Mini, Irex Illiad/DR1000, Foxit eSlick, PocketBook 360, Android phones and the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
How can I help get my device supported in |app|? How can I help get my device supported in |app|?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -108,7 +108,7 @@ Metadata download plugins
.. class:: calibre.ebooks.metadata.fetch.MetadataSource .. class:: calibre.ebooks.metadata.fetch.MetadataSource
Represents a source to query for metadata. Subclasses must implement Represents a source to query for metadata. Subclasses must implement
at least the fetch method and optionally the is_ok method. at least the fetch method.
When :meth:`fetch` is called, the `self` object will have the following When :meth:`fetch` is called, the `self` object will have the following
useful attributes (each of which may be None):: useful attributes (each of which may be None)::
@ -124,8 +124,9 @@ Metadata download plugins
.. automember:: calibre.ebooks.metadata.fetch.MetadataSource.metadata_type .. automember:: calibre.ebooks.metadata.fetch.MetadataSource.metadata_type
.. automember:: calibre.ebooks.metadata.fetch.MetadataSource.string_customization_help
.. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.fetch .. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.fetch
.. automethod:: calibre.ebooks.metadata.fetch.MetadataSource.is_ok

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
''' '''
Manage application-wide preferences. Manage application-wide preferences.
''' '''
import os, re, cPickle, textwrap, traceback import os, re, cPickle, textwrap, traceback, plistlib
from copy import deepcopy from copy import deepcopy
from functools import partial from functools import partial
from optparse import OptionParser as _OptionParser from optparse import OptionParser as _OptionParser
@ -34,9 +34,11 @@ else:
plugin_dir = os.path.join(config_dir, 'plugins') plugin_dir = os.path.join(config_dir, 'plugins')
CONFIG_DIR_MODE = 0700
def make_config_dir(): def make_config_dir():
if not os.path.exists(plugin_dir): if not os.path.exists(plugin_dir):
os.makedirs(plugin_dir, mode=448) # 0700 == 448 os.makedirs(plugin_dir, mode=CONFIG_DIR_MODE)
def check_config_write_access(): def check_config_write_access():
return os.access(config_dir, os.W_OK) and os.access(config_dir, os.X_OK) return os.access(config_dir, os.W_OK) and os.access(config_dir, os.X_OK)
@ -552,6 +554,72 @@ class DynamicConfig(dict):
dynamic = DynamicConfig() dynamic = DynamicConfig()
class XMLConfig(dict):
'''
Similar to :class:`DynamicConfig`, except that it uses an XML storage
backend instead of a pickle file.
See `http://docs.python.org/dev/library/plistlib.html`_ for the supported
data types.
'''
def __init__(self, rel_path_to_cf_file):
dict.__init__(self)
self.file_path = os.path.join(config_dir,
*(rel_path_to_cf_file.split('/')))
self.file_path = os.path.abspath(self.file_path)
if not self.file_path.endswith('.plist'):
self.file_path += '.plist'
self.refresh()
def refresh(self):
d = {}
if os.path.exists(self.file_path):
with ExclusiveFile(self.file_path) as f:
raw = f.read()
try:
d = plistlib.readPlistFromString(raw) if raw.strip() else {}
except SystemError:
pass
except:
import traceback
traceback.print_exc()
d = {}
self.clear()
self.update(d)
def __getitem__(self, key):
try:
ans = dict.__getitem__(self, key)
if isinstance(ans, plistlib.Data):
ans = ans.data
return ans
except KeyError:
return None
def __setitem__(self, key, val):
if isinstance(val, (bytes, str)):
val = plistlib.Data(val)
dict.__setitem__(self, key, val)
self.commit()
def set(self, key, val):
self.__setitem__(key, val)
def commit(self):
if hasattr(self, 'file_path') and self.file_path:
dpath = os.path.dirname(self.file_path)
if not os.path.exists(dpath):
os.makedirs(dpath, mode=CONFIG_DIR_MODE)
with ExclusiveFile(self.file_path) as f:
raw = plistlib.writePlistToString(self)
f.seek(0)
f.truncate()
f.write(raw)
def _prefs(): def _prefs():
c = Config('global', 'calibre wide preferences') c = Config('global', 'calibre wide preferences')
c.add_opt('database_path', c.add_opt('database_path',

View File

@ -56,6 +56,8 @@ def _quoteattr(data, entities={}):
the optional entities parameter. The keys and values must all be the optional entities parameter. The keys and values must all be
strings; each key will be replaced with its corresponding value. strings; each key will be replaced with its corresponding value.
""" """
entities['\n']='&#10;'
entities['\r']='&#12;'
data = _escape(data, entities) data = _escape(data, entities)
if '"' in data: if '"' in data:
if "'" in data: if "'" in data:

View File

@ -17,7 +17,7 @@
# #
# Contributor(s): # Contributor(s):
# #
TOOLSVERSION = u"ODFPY/0.9.1dev" TOOLSVERSION = u"ODFPY/0.9.2dev"
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0" ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0" DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"

View File

@ -185,7 +185,7 @@ class OpenDocument:
if self.fontfacedecls.hasChildNodes(): if self.fontfacedecls.hasChildNodes():
self.fontfacedecls.toXml(1, xml) self.fontfacedecls.toXml(1, xml)
a = AutomaticStyles() a = AutomaticStyles()
stylelist = self._used_auto_styles([self.styles, self.body]) stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body])
if len(stylelist) > 0: if len(stylelist) > 0:
a.write_open_tag(1, xml) a.write_open_tag(1, xml)
for s in stylelist: for s in stylelist:
@ -233,9 +233,11 @@ class OpenDocument:
for styleref in ( (DRAWNS,u'style-name'), for styleref in ( (DRAWNS,u'style-name'),
(DRAWNS,u'text-style-name'), (DRAWNS,u'text-style-name'),
(PRESENTATIONNS,u'style-name'), (PRESENTATIONNS,u'style-name'),
(STYLENS,u'style-name'), (STYLENS,u'data-style-name'),
(STYLENS,u'list-style-name'), (STYLENS,u'list-style-name'),
(STYLENS,u'page-layout-name'), (STYLENS,u'page-layout-name'),
(STYLENS,u'style-name'),
(TABLENS,u'default-cell-style-name'),
(TABLENS,u'style-name'), (TABLENS,u'style-name'),
(TEXTNS,u'style-name') ): (TEXTNS,u'style-name') ):
if e.getAttrNS(styleref[0],styleref[1]): if e.getAttrNS(styleref[0],styleref[1]):

View File

@ -50,3 +50,5 @@ def Radialgradient(**args):
def Stop(**args): def Stop(**args):
return Element(qname = (SVGNS,'stop'), **args) return Element(qname = (SVGNS,'stop'), **args)
def Title(**args):
return Element(qname = (SVGNS,'title'), **args)

View File

@ -446,6 +446,9 @@ def SequenceRef(**args):
def SheetName(**args): def SheetName(**args):
return Element(qname = (TEXTNS,'sheet-name'), **args) return Element(qname = (TEXTNS,'sheet-name'), **args)
def SoftPageBreak(**args):
return Element(qname = (TEXTNS,'soft-page-break'), **args)
def SortKey(**args): def SortKey(**args):
return Element(qname = (TEXTNS,'sort-key'), **args) return Element(qname = (TEXTNS,'sort-key'), **args)

View File

@ -1,6 +1,6 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2006-2007 Søren Roug, European Environment Agency # Copyright (C) 2006-2009 Søren Roug, European Environment Agency
# #
# This is free software. You may redistribute it under the terms # This is free software. You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version # of the Apache license and the GNU General Public License Version
@ -22,16 +22,11 @@
"""Class to show and manipulate user fields in odf documents.""" """Class to show and manipulate user fields in odf documents."""
import sys import sys
import time
import zipfile import zipfile
import xml.sax from odf.text import UserFieldDecl
import xml.sax.handler from odf.namespaces import OFFICENS
import xml.sax.saxutils from odf.opendocument import load
from odf.namespaces import OFFICENS, TEXTNS
from cStringIO import StringIO
OUTENCODING = "utf-8" OUTENCODING = "utf-8"
@ -60,16 +55,36 @@ class UserFields(object):
src ... source document name, file like object or None for stdin src ... source document name, file like object or None for stdin
dest ... destination document name, file like object or None for stdout dest ... destination document name, file like object or None for stdout
""" """
self.src_file = src self.src_file = src
self.dest_file = dest self.dest_file = dest
self.document = None
def loaddoc(self):
if isinstance(self.src_file, basestring):
# src_file is a filename, check if it is a zip-file
if not zipfile.is_zipfile(self.src_file):
raise TypeError("%s is no odt file." % self.src_file)
elif self.src_file is None:
# use stdin if no file given
self.src_file = sys.stdin
self.document = load(self.src_file)
def savedoc(self):
# write output
if self.dest_file is None:
# use stdout if no filename given
self.document.save('-')
else:
self.document.save(self.dest_file)
def list_fields(self): def list_fields(self):
"""List (extract) all known user-fields. """List (extract) all known user-fields.
Returns list of user-field names. Returns list of user-field names.
""" """
return [x[0] for x in self.list_fields_and_values()] return [x[0] for x in self.list_fields_and_values()]
@ -81,15 +96,21 @@ class UserFields(object):
Returns list of tuples (<field name>, <field type>, <value>). Returns list of tuples (<field name>, <field type>, <value>).
""" """
self.loaddoc()
found_fields = [] found_fields = []
def _callback(field_name, value_type, value, attrs): all_fields = self.document.getElementsByType(UserFieldDecl)
for f in all_fields:
value_type = f.getAttribute('valuetype')
if value_type == 'string':
value = f.getAttribute('stringvalue')
else:
value = f.getAttribute('value')
field_name = f.getAttribute('name')
if field_names is None or field_name in field_names: if field_names is None or field_name in field_names:
found_fields.append((field_name.encode(OUTENCODING), found_fields.append((field_name.encode(OUTENCODING),
value_type.encode(OUTENCODING), value_type.encode(OUTENCODING),
value.encode(OUTENCODING))) value.encode(OUTENCODING)))
return attrs
self._content_handler(_callback)
return found_fields return found_fields
def list_values(self, field_names): def list_values(self, field_names):
@ -133,199 +154,16 @@ class UserFields(object):
Returns None Returns None
""" """
def _callback(field_name, value_type, value, attrs): self.loaddoc()
if field_name in data: all_fields = self.document.getElementsByType(UserFieldDecl)
valattr = VALUE_TYPES.get(value_type) for f in all_fields:
attrs = dict(attrs.items()) field_name = f.getAttribute('name')
# Take advantage that startElementNS can take a normal if data.has_key(field_name):
# dict as attrs value_type = f.getAttribute('valuetype')
attrs[valattr] = data[field_name] value = data.get(field_name)
return attrs if value_type == 'string':
self._content_handler(_callback, write_file=True) f.setAttribute('stringvalue', value)
def _content_handler(self, callback_func, write_file=False):
"""Handle the content using the callback function and write result if
necessary.
callback_func ... function called for each field found in odf document
signature: field_name ... name of current field
value_type ... type of current field
value ... value of current field
attrs ... tuple of attrs of current field
returns: tuple or dict of attrs
write_file ... boolean telling wether write result to file
"""
class DevNull(object):
"""IO-object which behaves like /dev/null."""
def write(self, str):
pass
# get input
if isinstance(self.src_file, basestring):
# src_file is a filename, check if it is a zip-file
if not zipfile.is_zipfile(self.src_file):
raise TypeError("%s is no odt file." % self.src_file)
elif self.src_file is None:
# use stdin if no file given
self.src_file = sys.stdin
zin = zipfile.ZipFile(self.src_file, 'r')
content_xml = zin.read('content.xml')
# prepare output
if write_file:
output_io = StringIO()
if self.dest_file is None:
# use stdout if no filename given
self.dest_file = sys.stdout
zout = zipfile.ZipFile(self.dest_file, 'w')
else:
output_io = DevNull()
# parse input
odfs = ODFContentParser(callback_func, output_io)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, 1)
parser.setContentHandler(odfs)
parser.parse(StringIO(content_xml))
# write output
if write_file:
# Loop through the input zipfile and copy the content to
# the output until we get to the content.xml. Then
# substitute.
for zinfo in zin.infolist():
if zinfo.filename == "content.xml":
# Write meta
zi = zipfile.ZipInfo("content.xml", time.localtime()[:6])
zi.compress_type = zipfile.ZIP_DEFLATED
zout.writestr(zi, odfs.content())
else: else:
payload = zin.read(zinfo.filename) f.setAttribute('value', value)
zout.writestr(zinfo, payload) self.savedoc()
zout.close()
zin.close()
class ODFContentParser(xml.sax.saxutils.XMLGenerator):
def __init__(self, callback_func, out=None, encoding=OUTENCODING):
"""Constructor.
callback_func ... function called for each field found in odf document
signature: field_name ... name of current field
value_type ... type of current field
value ... value of current field
attrs ... tuple of attrs of current field
returns: tuple or dict of attrs
out ... file like object for output
encoding ... encoding for output
"""
self._callback_func = callback_func
xml.sax.saxutils.XMLGenerator.__init__(self, out, encoding)
def _qname(self, name):
"""Builds a qualified name from a (ns_url, localname) pair"""
if name[0]:
if name[0] == u'http://www.w3.org/XML/1998/namespace':
return u'xml' + ":" + name[1]
# The name is in a non-empty namespace
prefix = self._current_context[name[0]]
if prefix:
# If it is not the default namespace, prepend the prefix
return prefix + ":" + name[1]
# Return the unqualified name
return name[1]
def startElementNS(self, name, qname, attrs):
if name == (TEXTNS, u'user-field-decl'):
field_name = attrs.get((TEXTNS, u'name'))
value_type = attrs.get((OFFICENS, u'value-type'))
if value_type == 'string':
value = attrs.get((OFFICENS, u'string-value'))
else:
value = attrs.get((OFFICENS, u'value'))
attrs = self._callback_func(field_name, value_type, value, attrs)
self._startElementNS(name, qname, attrs)
def _startElementNS(self, name, qname, attrs):
# copy of xml.sax.saxutils.XMLGenerator.startElementNS
# necessary because we have to provide our own writeattr
# function which is called by this method
if name[0] is None:
name = name[1]
elif self._current_context[name[0]] is None:
# default namespace
name = name[1]
else:
name = self._current_context[name[0]] + ":" + name[1]
self._out.write('<' + name)
for k,v in self._undeclared_ns_maps:
if k is None:
self._out.write(' xmlns="%s"' % (v or ''))
else:
self._out.write(' xmlns:%s="%s"' % (k,v))
self._undeclared_ns_maps = []
for (name, value) in attrs.items():
if name[0] is None:
name = name[1]
elif self._current_context[name[0]] is None:
# default namespace
#If an attribute has a nsuri but not a prefix, we must
#create a prefix and add a nsdecl
prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr
self._generated_prefix_ctr = self._generated_prefix_ctr + 1
name = prefix + ':' + name[1]
self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0])))
self._current_context[name[0]] = prefix
else:
name = self._current_context[name[0]] + ":" + name[1]
self._out.write(' %s=' % name)
writeattr(self._out, value)
self._out.write('>')
def content(self):
return self._out.getvalue()
ATTR_ENTITIES = {
'\n': '&#x0a;' # convert newlines into entities inside attributes
}
def writetext(stream, text, entities={}):
text = xml.sax.saxutils.escape(text, entities)
try:
stream.write(text)
except UnicodeError:
for c in text:
try:
stream.write(c)
except UnicodeError:
stream.write(u"&#%d;" % ord(c))
def writeattr(stream, text):
# copied from xml.sax.saxutils.writeattr added support for an
# additional entity mapping
countdouble = text.count('"')
entities = ATTR_ENTITIES.copy()
if countdouble:
countsingle = text.count("'")
if countdouble <= countsingle:
entities['"'] = "&quot;"
quote = '"'
else:
entities["'"] = "&apos;"
quote = "'"
else:
quote = '"'
stream.write(quote)
writetext(stream, text, entities)
stream.write(quote)