Merge from trunk

This commit is contained in:
Charles Haley 2013-05-24 15:56:52 +02:00
commit 00c1d4ea54
125 changed files with 33062 additions and 27652 deletions

View File

@ -20,6 +20,56 @@
# new recipes: # new recipes:
# - title: # - title:
- version: 0.9.32
date: 2013-05-24
new features:
- title: "Show the number of currently selected books in the status bar at the bottom of the book list"
- title: "Driver for PocketBook Touch 623 and Yarvik tablet Xenta 13c"
tickets: [1182850, 1181669]
- title: "When editing dates such as published, allow pressing the minus key to clear the date and the = key to set the date to today."
tickets: [1181449]
bug fixes:
- title: "EPUB/AZW3 Output: Fix regression that caused erros when trying to convert documents that have URLs with invalid (non-utf-8) quoting."
tickets: [1181049]
- title: "When backing up metadata automatically remove XML invalid chars, instead of erroring out"
- title: "ebook-viewer: Fix --debug-javascript option causing an error when running from a binary build on os x and linux"
- title: "Fix switch library dialog and menu both popping up when clicking the library button in some window managers"
- title: "Apple driver: Fix a regression in 0.9.31 that could cause sending books to the device to hang"
- title: "When setting metadata using the edit metadata dialog, convert newlines, tabs etc. to normal spaces"
tickets: [1182268]
- title: "EPUB/AZW3 Output: Fix pages that contain only an svg image being regarded as empty and removed during splitting"
- title: "AZW3 Input: Handle files that use unnecessary svg: prefixes."
tickets: [1182257]
- title: "EPUB Input: Handle EPUB files that have no <metadata> section in their OPF."
tickets: [1181546]
- title: "Get Books: Fix Foyles UK store plugin."
tickets: [1181494]
improved recipes:
- Wall Street Journal
- Various Polish news sources
- Handelsblatt
- The Australian
- Las Vegas Review
- NME
new recipes:
- title: WirtschaftsWoche Online
author: Hegi
- version: 0.9.31 - version: 0.9.31
date: 2013-05-17 date: 2013-05-17

View File

@ -57,6 +57,26 @@ library. The virtual library will then be created based on the search
you just typed in. Searches are very powerful, for examples of the kinds you just typed in. Searches are very powerful, for examples of the kinds
of things you can do with them, see :ref:`search_interface`. of things you can do with them, see :ref:`search_interface`.
Examples of useful Virtual Libraries
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* Books added to |app| in the last day::
date:>1daysago
* Books added to |app| in the last month::
date:>30daysago
* Books with a rating of 5 stars::
rating:5
* Books with a rating of at least 4 stars::
rating:>=4
* Books with no rating::
rating:false
* Periodicals downloaded by the Fetch News function in |app|::
tags:=News and author:=calibre
* Books with no tags::
tags:false
* Books with no covers::
cover:false
Working with Virtual Libraries Working with Virtual Libraries
------------------------------------- -------------------------------------

View File

@ -1,47 +1,24 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class Adventure_zone(BasicNewsRecipe): class Adventure_zone(BasicNewsRecipe):
title = u'Adventure Zone' title = u'Adventure Zone'
__author__ = 'fenuks' __author__ = 'fenuks'
description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.'
category = 'games' category = 'games'
language = 'pl' language = 'pl'
BASEURL = 'http://www.adventure-zone.info/fusion/'
no_stylesheets = True no_stylesheets = True
extra_css = '.image {float: left; margin-right: 5px;}'
oldest_article = 20 oldest_article = 20
max_articles_per_feed = 100 max_articles_per_feed = 100
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png' cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
index = 'http://www.adventure-zone.info/fusion/' remove_attributes = ['style']
use_embedded_content = False use_embedded_content = False
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''), keep_only_tags = [dict(attrs={'class':'content'})]
(re.compile(r'</?table.*?>'), lambda match: ''), remove_tags = [dict(attrs={'class':'footer'})]
(re.compile(r'</?tbody.*?>'), lambda match: '')] feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')]
remove_tags_before = dict(name='td', attrs={'class':'main-bg'})
remove_tags = [dict(name='img', attrs={'alt':'Drukuj'})]
remove_tags_after = dict(id='comments')
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; } img.news-category {float: left; margin-right: 5px;}'
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
'''def get_cover_url(self):
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
cover=soup.find(id='box_OstatninumerAZ')
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
return getattr(self, 'cover_url', self.cover_url)'''
def populate_article_metadata(self, article, soup, first):
result = re.search('(.+) - Adventure Zone', soup.title.string)
if result:
result = result.group(1)
else:
result = soup.body.find('strong')
if result:
result = result.string
if result:
result = result.replace('&amp;', '&')
result = result.replace('&#39;', '')
article.title = result
def skip_ad_pages(self, soup): def skip_ad_pages(self, soup):
skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'}) skip_tag = soup.body.find(attrs={'class':'content'})
skip_tag = skip_tag.findAll(name='a') skip_tag = skip_tag.findAll(name='a')
title = soup.title.string.lower() title = soup.title.string.lower()
if (('zapowied' in title) or ('recenzj' in title) or ('solucj' in title) or ('poradnik' in title)): if (('zapowied' in title) or ('recenzj' in title) or ('solucj' in title) or ('poradnik' in title)):
@ -49,20 +26,10 @@ class Adventure_zone(BasicNewsRecipe):
if r.strong and r.strong.string: if r.strong and r.strong.string:
word=r.strong.string.lower() word=r.strong.string.lower()
if (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)): if (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True) return self.index_to_soup(self.BASEURL+r['href'], raw=True)
def preprocess_html(self, soup): def preprocess_html(self, soup):
footer=soup.find(attrs={'class':'news-footer middle-border'}) for link in soup.findAll('a', href=True):
r = soup.find(name='td', attrs={'class':'capmain'}) if not link['href'].startswith('http'):
if r: link['href'] = self.BASEURL + link['href']
r.name='h1'
for item in soup.findAll(name=['tr', 'td']):
item.name='div'
if footer and len(footer('a'))>=2:
footer('a')[1].extract()
for item in soup.findAll(style=True):
del item['style']
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup return soup

View File

@ -13,6 +13,7 @@ class Astroflesz(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
remove_empty_feeds = True
remove_attributes = ['style'] remove_attributes = ['style']
keep_only_tags = [dict(id="k2Container")] keep_only_tags = [dict(id="k2Container")]
remove_tags_after = dict(name='div', attrs={'class':'itemLinks'}) remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})

View File

@ -6,12 +6,10 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
2013, Tomasz Długosz, tomek3d@gmail.com' 2013, Tomasz Długosz, tomek3d@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
from datetime import date
import re import re
from lxml import html
class GN(BasicNewsRecipe): class GN(BasicNewsRecipe):
EDITION = 0
__author__ = 'Piotr Kontek, Tomasz Długosz' __author__ = 'Piotr Kontek, Tomasz Długosz'
title = u'Gość Niedzielny' title = u'Gość Niedzielny'
@ -20,83 +18,23 @@ class GN(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
language = 'pl' language = 'pl'
remove_javascript = True remove_javascript = True
temp_files = []
articles_are_obfuscated = True def find_last_issue(self):
raw = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/', raw=True)
doc = html.fromstring(raw)
page = doc.xpath('//div[@class="c"]//div[@class="search-result"]/div[1]/div[2]/h1//a/@href')
def get_obfuscated_article(self, url): return page[1]
br = self.get_browser()
br.open(url)
source = br.response().read()
page = self.index_to_soup(source)
main_section = page.find('div',attrs={'class':'txt doc_prnt_prv'})
title = main_section.find('h2')
info = main_section.find('div', attrs={'class' : 'cf doc_info'})
authors = info.find(attrs={'class':'l'})
article = str(main_section.find('p', attrs={'class' : 'doc_lead'}))
first = True
for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
if first and p.find('img') != None:
article += '<p>'
article += str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
article += '<font size="-2">'
for s in p.findAll('span'):
article += self.tag_to_string(s)
article += '</font></p>'
else:
article += str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
first = False
limiter = main_section.find('p', attrs={'class' : 'limiter'})
if limiter:
article += str(limiter)
html = unicode(title)
#sometimes authors are not filled in:
if authors:
html += unicode(authors) + unicode(article)
else:
html += unicode(article)
self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
def find_last_issue(self, year):
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
#szukam zdjęcia i linka do poprzedniego pełnego numeru
first = True
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
img = d.find('img')
if img != None:
a = img.parent
self.EDITION = a['href']
#this was preventing kindles from moving old issues to 'Back Issues' category:
#self.title = img['alt']
self.cover_url = 'http://www.gosc.pl' + img['src']
if year != date.today().year or not first:
break
first = False
def parse_index(self): def parse_index(self):
year = date.today().year soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue())
self.find_last_issue(year)
##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
if self.EDITION == 0:
self.find_last_issue(year-1)
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
feeds = [] feeds = []
#wstepniak #wstepniak
a = soup.find('div',attrs={'class':'release-wp-b'}).find('a') a = soup.find('div',attrs={'class':'release-wp-b'}).find('a')
articles = [ articles = [
{'title' : self.tag_to_string(a), {'title' : self.tag_to_string(a),
'url' : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/'), 'url' : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/')
'date' : '', }]
'description' : ''}
]
feeds.append((u'Wstępniak',articles)) feeds.append((u'Wstępniak',articles))
#kategorie #kategorie
for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}): for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):
@ -113,16 +51,46 @@ class GN(BasicNewsRecipe):
art = a.find('a') art = a.find('a')
yield { yield {
'title' : self.tag_to_string(art), 'title' : self.tag_to_string(art),
'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'), 'url' : 'http://www.gosc.pl' + art['href']
'date' : '',
'description' : ''
} }
for a in main_block.findAll('div', attrs={'class':'sr-document'}): for a in main_block.findAll('div', attrs={'class':'sr-document'}):
art = a.find('a') art = a.find('a')
yield { yield {
'title' : self.tag_to_string(art), 'title' : self.tag_to_string(art),
'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'), 'url' : 'http://www.gosc.pl' + art['href']
'date' : '',
'description' : ''
} }
def append_page(self, soup, appendtag):
chpage= appendtag.find(attrs={'class':'pgr_nrs'})
if chpage:
for page in chpage.findAll('a'):
soup2 = self.index_to_soup('http://gosc.pl' + page['href'])
pagetext = soup2.find(attrs={'class':'intextAd'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
'''
for image_div in soup.findAll(attrs={'class':'doc_image'}):
link =
if 'm.jpg' in image['src']:
image['src'] = image['src'].replace('m.jpg', '.jpg')
'''
return soup
keep_only_tags = [
dict(name='div', attrs={'class':'cf txt'})
]
remove_tags = [
dict(name='p', attrs={'class':['r tr', 'l l-2', 'wykop']}),
dict(name='div', attrs={'class':['doc_actions', 'pgr', 'fr1_cl']}),
dict(name='div', attrs={'id':'vote'})
]
extra_css = '''
h1 {font-size:150%}
div#doc_image {font-style:italic; font-size:70%}
p.limiter {font-size:150%; font-weight: bold}
'''

View File

@ -1,16 +1,61 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Handelsblatt(BasicNewsRecipe): class Handelsblatt(BasicNewsRecipe):
title = u'Handelsblatt' title = u'Handelsblatt'
__author__ = 'malfi' __author__ = 'malfi' # modified by Hegi, last change 2013-05-20
description = u'Handelsblatt - basierend auf den RRS-Feeds von Handelsblatt.de'
tags = 'Nachrichten, Blog, Wirtschaft'
publisher = 'Verlagsgruppe Handelsblatt GmbH'
category = 'business, economy, news, Germany'
publication_type = 'daily newspaper'
language = 'de_DE'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True simultaneous_downloads= 20
# cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
language = 'de'
remove_tags_before = dict(attrs={'class':'hcf-overline'}) auto_cleanup = False
remove_tags_after = dict(attrs={'class':'hcf-footer'}) no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
# don't duplicate articles from "Schlagzeilen" / "Exklusiv" to other rubrics
ignore_duplicate_articles = {'title', 'url'}
# if you want to reduce size for an b/w or E-ink device, uncomment this:
# compress_news_images = True
# compress_news_images_auto_size = 16
# scale_news_images = (400,300)
timefmt = ' [%a, %d %b %Y]'
conversion_options = {'smarten_punctuation' : True,
'authors' : publisher,
'publisher' : publisher}
language = 'de_DE'
encoding = 'UTF-8'
cover_source = 'http://www.handelsblatt-shop.com/epaper/482/'
# masthead_url = 'http://www.handelsblatt.com/images/hb_logo/6543086/1-format3.jpg'
masthead_url = 'http://www.handelsblatt-chemie.de/wp-content/uploads/2012/01/hb-logo.gif'
def get_cover_url(self):
cover_source_soup = self.index_to_soup(self.cover_source)
preview_image_div = cover_source_soup.find(attrs={'class':'vorschau'})
return 'http://www.handelsblatt-shop.com'+preview_image_div.a.img['src']
# remove_tags_before = dict(attrs={'class':'hcf-overline'})
# remove_tags_after = dict(attrs={'class':'hcf-footer'})
# Alternatively use this:
keep_only_tags = [
dict(name='div', attrs={'class':['hcf-column hcf-column1 hcf-teasercontainer hcf-maincol']}),
dict(name='div', attrs={'id':['contentMain']})
]
remove_tags = [
dict(name='div', attrs={'class':['hcf-link-block hcf-faq-open', 'hcf-article-related']})
]
feeds = [ feeds = [
(u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'), (u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
@ -25,15 +70,19 @@ class Handelsblatt(BasicNewsRecipe):
(u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs') (u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
] ]
extra_css = ''' # Insert ". " after "Place" in <span class="hcf-location-mark">Place</span>
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} # If you use .epub format you could also do this as extra_css '.hcf-location-mark:after {content: ". "}'
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} preprocess_regexps = [(re.compile(r'(<span class="hcf-location-mark">[^<]*)(</span>)',
p{font-family:Arial,Helvetica,sans-serif;font-size:small;} re.DOTALL|re.IGNORECASE), lambda match: match.group(1) + '. ' + match.group(2))]
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
''' extra_css = 'h1 {font-size: 1.6em; text-align: left} \
h2 {font-size: 1em; font-style: italic; font-weight: normal} \
h3 {font-size: 1.3em;text-align: left} \
h4, h5, h6, a {font-size: 1em;text-align: left} \
.hcf-caption {font-size: 1em;text-align: left; font-style: italic} \
.hcf-location-mark {font-style: italic}'
def print_version(self, url): def print_version(self, url):
url = url.split('/') main, sep, id = url.rpartition('/')
url[-1] = 'v_detail_tab_print,'+url[-1] return main + '/v_detail_tab_print/' + id
url = '/'.join(url)
return url

View File

@ -13,11 +13,12 @@ class Histmag(BasicNewsRecipe):
__author__ = 'matek09' __author__ = 'matek09'
description = u"Artykuly historyczne i publicystyczne" description = u"Artykuly historyczne i publicystyczne"
encoding = 'utf-8' encoding = 'utf-8'
extra_css = '''.center img {display: block;}'''
#preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),(re.compile(r'<span>'), lambda match: '<br><br><span>')] #preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),(re.compile(r'<span>'), lambda match: '<br><br><span>')]
no_stylesheets = True no_stylesheets = True
language = 'pl' language = 'pl'
remove_javascript = True remove_javascript = True
keep_only_tags=[dict(id='article')] keep_only_tags=[dict(id='article')]
remove_tags=[dict(name = 'p', attrs = {'class' : 'article-tags'})] remove_tags=[dict(name = 'p', attrs = {'class' : 'article-tags'}), dict(attrs={'class':'twitter-share-button'})]
feeds = [(u'Wszystkie', u'http://histmag.org/rss/wszystkie.xml'), (u'Wydarzenia', u'http://histmag.org/rss/wydarzenia.xml'), (u'Recenzje', u'http://histmag.org/rss/recenzje.xml'), (u'Artykuły historyczne', u'http://histmag.org/rss/historia.xml'), (u'Publicystyka', u'http://histmag.org/rss/publicystyka.xml')] feeds = [(u'Wszystkie', u'http://histmag.org/rss/wszystkie.xml'), (u'Wydarzenia', u'http://histmag.org/rss/wydarzenia.xml'), (u'Recenzje', u'http://histmag.org/rss/recenzje.xml'), (u'Artykuły historyczne', u'http://histmag.org/rss/historia.xml'), (u'Publicystyka', u'http://histmag.org/rss/publicystyka.xml')]

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

BIN
recipes/icons/gs24_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 428 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 541 B

BIN
recipes/icons/pc_lab.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 697 B

BIN
recipes/icons/polityka.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 346 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 418 B

View File

@ -20,7 +20,7 @@ class OSNewsRecipe(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
encoding = 'utf-8' encoding = 'utf-8'
use_embedded_content = False; use_embedded_content = False;
remove_empty_feeds = True
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
cover_url='http://osnews.pl/wp-content/themes/osnews/img/logo.png' cover_url='http://osnews.pl/wp-content/themes/osnews/img/logo.png'
@ -31,22 +31,18 @@ class OSNewsRecipe(BasicNewsRecipe):
''' '''
feeds = [ feeds = [
(u'OSNews.pl', u'http://feeds.feedburner.com/OSnewspl') (u'Niusy', u'http://feeds.feedburner.com/OSnewspl'),
(u'Wylęgarnia', u'http://feeds.feedburner.com/osnewspl_nowe')
] ]
keep_only_tags = [ keep_only_tags = [
dict(name = 'a', attrs = {'class' : 'news-heading'}), dict(name = 'div', attrs = {'id' : 'content'})
dict(name = 'div', attrs = {'class' : 'newsinformations'}),
dict(name = 'div', attrs = {'id' : 'news-content'})
] ]
remove_tags = [ remove_tags = [
dict(name = 'div', attrs = {'class' : 'sociable'}), dict(name = 'div', attrs = {'class' : ['newstags', 'tw_button', 'post_prev']}),
dict(name = 'div', attrs = {'class' : 'post_prev'}), dict(name = 'div', attrs = {'id' : 'newspage_upinfo'}),
dict(name = 'div', attrs = {'class' : 'post_next'}),
dict(name = 'div', attrs = {'class' : 'clr'}),
dict(name = 'div', attrs = {'class' : 'tw_button'}),
dict(name = 'div', attrs = {'style' : 'width:56px;height:60px;float:left;margin-right:10px'})
] ]
preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span')] remove_tags_after = dict(name = 'div', attrs = {'class' : 'post_prev'})
preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span'), (re.compile(u'<iframe.+?</iframe>'), lambda match: '')]

View File

@ -0,0 +1,86 @@
__license__ = 'GPL v3'
__copyright__ = '2013, Armin Geller'
'''
Fetch WirtschaftsWoche Online
'''
import re
# import time
from calibre.web.feeds.news import BasicNewsRecipe
class WirtschaftsWocheOnline(BasicNewsRecipe):
title = u'WirtschaftsWoche Online'
__author__ = 'Hegi' # Update AGE 2013-01-05; Modified by Hegi 2013-04-28
description = u'Wirtschaftswoche Online - basierend auf den RRS-Feeds von Wiwo.de'
tags = 'Nachrichten, Blog, Wirtschaft'
publisher = 'Verlagsgruppe Handelsblatt GmbH / Redaktion WirtschaftsWoche Online'
category = 'business, economy, news, Germany'
publication_type = 'weekly magazine'
language = 'de'
oldest_article = 7
max_articles_per_feed = 100
simultaneous_downloads= 20
auto_cleanup = False
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
# don't duplicate articles from "Schlagzeilen" / "Exklusiv" to other rubrics
ignore_duplicate_articles = {'title', 'url'}
# if you want to reduce size for an b/w or E-ink device, uncomment this:
# compress_news_images = True
# compress_news_images_auto_size = 16
# scale_news_images = (400,300)
timefmt = ' [%a, %d %b %Y]'
conversion_options = {'smarten_punctuation' : True,
'authors' : publisher,
'publisher' : publisher}
language = 'de_DE'
encoding = 'UTF-8'
cover_source = 'http://www.wiwo-shop.de/wirtschaftswoche/wirtschaftswoche-emagazin-p1952.html'
masthead_url = 'http://www.wiwo.de/images/wiwo_logo/5748610/1-formatOriginal.png'
def get_cover_url(self):
cover_source_soup = self.index_to_soup(self.cover_source)
preview_image_div = cover_source_soup.find(attrs={'class':'container vorschau'})
return 'http://www.wiwo-shop.de'+preview_image_div.a.img['src']
# Insert ". " after "Place" in <span class="hcf-location-mark">Place</span>
# If you use .epub format you could also do this as extra_css '.hcf-location-mark:after {content: ". "}'
preprocess_regexps = [(re.compile(r'(<span class="hcf-location-mark">[^<]*)(</span>)',
re.DOTALL|re.IGNORECASE), lambda match: match.group(1) + '. ' + match.group(2))]
extra_css = 'h1 {font-size: 1.6em; text-align: left} \
h2 {font-size: 1em; font-style: italic; font-weight: normal} \
h3 {font-size: 1.3em;text-align: left} \
h4, h5, h6, a {font-size: 1em;text-align: left} \
.hcf-caption {font-size: 1em;text-align: left; font-style: italic} \
.hcf-location-mark {font-style: italic}'
keep_only_tags = [
dict(name='div', attrs={'class':['hcf-column hcf-column1 hcf-teasercontainer hcf-maincol']}),
dict(name='div', attrs={'id':['contentMain']})
]
remove_tags = [
dict(name='div', attrs={'class':['hcf-link-block hcf-faq-open', 'hcf-article-related']})
]
feeds = [
(u'Schlagzeilen', u'http://www.wiwo.de/contentexport/feed/rss/schlagzeilen'),
(u'Exklusiv', u'http://www.wiwo.de/contentexport/feed/rss/exklusiv'),
# (u'Themen', u'http://www.wiwo.de/contentexport/feed/rss/themen'), # AGE no print version
(u'Unternehmen', u'http://www.wiwo.de/contentexport/feed/rss/unternehmen'),
(u'Finanzen', u'http://www.wiwo.de/contentexport/feed/rss/finanzen'),
(u'Politik', u'http://www.wiwo.de/contentexport/feed/rss/politik'),
(u'Erfolg', u'http://www.wiwo.de/contentexport/feed/rss/erfolg'),
(u'Technologie', u'http://www.wiwo.de/contentexport/feed/rss/technologie'),
# (u'Green-WiWo', u'http://green.wiwo.de/feed/rss/') # AGE no print version
]
def print_version(self, url):
main, sep, id = url.rpartition('/')
return main + '/v_detail_tab_print/' + id

View File

@ -112,7 +112,7 @@ class WallStreetJournal(BasicNewsRecipe):
if date is not None: if date is not None:
self.timefmt = ' [%s]'%self.tag_to_string(date) self.timefmt = ' [%s]'%self.tag_to_string(date)
cov = soup.find('div', attrs={'class':'itpSectionHeaderPdf'}) cov = soup.find('div', attrs={'class':lambda x: x and 'itpSectionHeaderPdf' in x.split()})
if cov is not None: if cov is not None:
a = cov.find('a', href=True) a = cov.find('a', href=True)
if a is not None: if a is not None:

View File

@ -13,14 +13,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2013-03-23 10:17+0000\n" "PO-Revision-Date: 2013-05-21 06:13+0000\n"
"Last-Translator: Глория Хрусталёва <gloriya@hushmail.com>\n" "Last-Translator: Глория Хрусталёва <gloriya@hushmail.com>\n"
"Language-Team: Russian <debian-l10n-russian@lists.debian.org>\n" "Language-Team: Russian <debian-l10n-russian@lists.debian.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2013-03-24 04:45+0000\n" "X-Launchpad-Export-Date: 2013-05-22 04:38+0000\n"
"X-Generator: Launchpad (build 16540)\n" "X-Generator: Launchpad (build 16626)\n"
"Language: ru\n" "Language: ru\n"
#. name for aaa #. name for aaa
@ -5361,7 +5361,7 @@ msgstr ""
#. name for coa #. name for coa
msgid "Malay; Cocos Islands" msgid "Malay; Cocos Islands"
msgstr "" msgstr "Малайский; Кокосовые острова"
#. name for cob #. name for cob
msgid "Chicomuceltec" msgid "Chicomuceltec"

View File

@ -30,14 +30,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-" "Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n" "devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n" "POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2013-05-13 05:58+0000\n" "PO-Revision-Date: 2013-05-19 09:23+0000\n"
"Last-Translator: Merarom <Unknown>\n" "Last-Translator: Merarom <Unknown>\n"
"Language-Team: Swedish <sv@li.org>\n" "Language-Team: Swedish <sv@li.org>\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n" "Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2013-05-14 05:30+0000\n" "X-Launchpad-Export-Date: 2013-05-20 05:34+0000\n"
"X-Generator: Launchpad (build 16617)\n" "X-Generator: Launchpad (build 16626)\n"
"Language: sv\n" "Language: sv\n"
#. name for aaa #. name for aaa
@ -4582,35 +4582,35 @@ msgstr ""
#. name for bzl #. name for bzl
msgid "Boano (Sulawesi)" msgid "Boano (Sulawesi)"
msgstr "" msgstr "Boano (Sulawesi/Cebeles)"
#. name for bzm #. name for bzm
msgid "Bolondo" msgid "Bolondo"
msgstr "" msgstr "Bolondo"
#. name for bzn #. name for bzn
msgid "Boano (Maluku)" msgid "Boano (Maluku)"
msgstr "" msgstr "Boano (Maluku)"
#. name for bzo #. name for bzo
msgid "Bozaba" msgid "Bozaba"
msgstr "" msgstr "Bozaba"
#. name for bzp #. name for bzp
msgid "Kemberano" msgid "Kemberano"
msgstr "" msgstr "Kemberano"
#. name for bzq #. name for bzq
msgid "Buli (Indonesia)" msgid "Buli (Indonesia)"
msgstr "" msgstr "Buli (Indonesien)"
#. name for bzr #. name for bzr
msgid "Biri" msgid "Biri"
msgstr "" msgstr "Biri"
#. name for bzs #. name for bzs
msgid "Brazilian Sign Language" msgid "Brazilian Sign Language"
msgstr "" msgstr "Brasilianskt teckenspråk"
#. name for bzt #. name for bzt
msgid "Brithenig" msgid "Brithenig"
@ -4618,39 +4618,39 @@ msgstr ""
#. name for bzu #. name for bzu
msgid "Burmeso" msgid "Burmeso"
msgstr "" msgstr "Burmanska"
#. name for bzv #. name for bzv
msgid "Bebe" msgid "Bebe"
msgstr "" msgstr "Bebe"
#. name for bzw #. name for bzw
msgid "Basa (Nigeria)" msgid "Basa (Nigeria)"
msgstr "" msgstr "Basa (Nigeria)"
#. name for bzx #. name for bzx
msgid "Bozo; Kɛlɛngaxo" msgid "Bozo; Kɛlɛngaxo"
msgstr "" msgstr "Bozo; (Mali)"
#. name for bzy #. name for bzy
msgid "Obanliku" msgid "Obanliku"
msgstr "" msgstr "Obanliku"
#. name for bzz #. name for bzz
msgid "Evant" msgid "Evant"
msgstr "" msgstr "Evant"
#. name for caa #. name for caa
msgid "Chortí" msgid "Chortí"
msgstr "" msgstr "Chortí"
#. name for cab #. name for cab
msgid "Garifuna" msgid "Garifuna"
msgstr "" msgstr "Garifuna"
#. name for cac #. name for cac
msgid "Chuj" msgid "Chuj"
msgstr "" msgstr "Chuj"
#. name for cad #. name for cad
msgid "Caddo" msgid "Caddo"
@ -4658,59 +4658,59 @@ msgstr "Caddo"
#. name for cae #. name for cae
msgid "Lehar" msgid "Lehar"
msgstr "" msgstr "Lezginska"
#. name for caf #. name for caf
msgid "Carrier; Southern" msgid "Carrier; Southern"
msgstr "" msgstr "Carrier; södra"
#. name for cag #. name for cag
msgid "Nivaclé" msgid "Nivaclé"
msgstr "" msgstr "Nivaclé"
#. name for cah #. name for cah
msgid "Cahuarano" msgid "Cahuarano"
msgstr "" msgstr "Cahuarano; Peru"
#. name for caj #. name for caj
msgid "Chané" msgid "Chané"
msgstr "" msgstr "Chané"
#. name for cak #. name for cak
msgid "Kaqchikel" msgid "Kaqchikel"
msgstr "" msgstr "Kaqchikel"
#. name for cal #. name for cal
msgid "Carolinian" msgid "Carolinian"
msgstr "" msgstr "Carolinian"
#. name for cam #. name for cam
msgid "Cemuhî" msgid "Cemuhî"
msgstr "" msgstr "Cemuhî"
#. name for can #. name for can
msgid "Chambri" msgid "Chambri"
msgstr "" msgstr "Chambri"
#. name for cao #. name for cao
msgid "Chácobo" msgid "Chácobo"
msgstr "" msgstr "Chácobo"
#. name for cap #. name for cap
msgid "Chipaya" msgid "Chipaya"
msgstr "" msgstr "Chipaya"
#. name for caq #. name for caq
msgid "Nicobarese; Car" msgid "Nicobarese; Car"
msgstr "" msgstr "Nicobarese; Car"
#. name for car #. name for car
msgid "Carib; Galibi" msgid "Carib; Galibi"
msgstr "" msgstr "Carib; Galibi"
#. name for cas #. name for cas
msgid "Tsimané" msgid "Tsimané"
msgstr "" msgstr "Tsimshian; Britiska Columbia"
#. name for cat #. name for cat
msgid "Catalan" msgid "Catalan"
@ -4718,15 +4718,15 @@ msgstr "Katalanska"
#. name for cav #. name for cav
msgid "Cavineña" msgid "Cavineña"
msgstr "" msgstr "Cavineña"
#. name for caw #. name for caw
msgid "Callawalla" msgid "Callawalla"
msgstr "" msgstr "Callawalla; Bolivia"
#. name for cax #. name for cax
msgid "Chiquitano" msgid "Chiquitano"
msgstr "" msgstr "Chiquitano; Bolivia"
#. name for cay #. name for cay
msgid "Cayuga" msgid "Cayuga"
@ -4734,115 +4734,115 @@ msgstr ""
#. name for caz #. name for caz
msgid "Canichana" msgid "Canichana"
msgstr "" msgstr "Canichana"
#. name for cbb #. name for cbb
msgid "Cabiyarí" msgid "Cabiyarí"
msgstr "" msgstr "Cabiyarí"
#. name for cbc #. name for cbc
msgid "Carapana" msgid "Carapana"
msgstr "" msgstr "Carapana; Colombia & Brasilien"
#. name for cbd #. name for cbd
msgid "Carijona" msgid "Carijona"
msgstr "" msgstr "Carijona"
#. name for cbe #. name for cbe
msgid "Chipiajes" msgid "Chipiajes"
msgstr "" msgstr "Chipiajes"
#. name for cbg #. name for cbg
msgid "Chimila" msgid "Chimila"
msgstr "" msgstr "Chimila"
#. name for cbh #. name for cbh
msgid "Cagua" msgid "Cagua"
msgstr "" msgstr "Cagua;Venezuela"
#. name for cbi #. name for cbi
msgid "Chachi" msgid "Chachi"
msgstr "" msgstr "Chachi; Ecuador"
#. name for cbj #. name for cbj
msgid "Ede Cabe" msgid "Ede Cabe"
msgstr "" msgstr "Ede Cabe"
#. name for cbk #. name for cbk
msgid "Chavacano" msgid "Chavacano"
msgstr "" msgstr "Chavacano; Filippinerna"
#. name for cbl #. name for cbl
msgid "Chin; Bualkhaw" msgid "Chin; Bualkhaw"
msgstr "" msgstr "Chin; Bualkhaw"
#. name for cbn #. name for cbn
msgid "Nyahkur" msgid "Nyahkur"
msgstr "" msgstr "Nyahkur;Australien"
#. name for cbo #. name for cbo
msgid "Izora" msgid "Izora"
msgstr "" msgstr "Izora"
#. name for cbr #. name for cbr
msgid "Cashibo-Cacataibo" msgid "Cashibo-Cacataibo"
msgstr "" msgstr "Cashibo-Cacataibo;Peru"
#. name for cbs #. name for cbs
msgid "Cashinahua" msgid "Cashinahua"
msgstr "" msgstr "Cashinahua;Peru"
#. name for cbt #. name for cbt
msgid "Chayahuita" msgid "Chayahuita"
msgstr "" msgstr "Chayahuita;Peru"
#. name for cbu #. name for cbu
msgid "Candoshi-Shapra" msgid "Candoshi-Shapra"
msgstr "" msgstr "Candoshi-Shapra;Peru"
#. name for cbv #. name for cbv
msgid "Cacua" msgid "Cacua"
msgstr "" msgstr "Cacua;Colombia"
#. name for cbw #. name for cbw
msgid "Kinabalian" msgid "Kinabalian"
msgstr "" msgstr "Kinabalian;sydöstra Filippinerna"
#. name for cby #. name for cby
msgid "Carabayo" msgid "Carabayo"
msgstr "" msgstr "Carabayo;Colombia"
#. name for cca #. name for cca
msgid "Cauca" msgid "Cauca"
msgstr "" msgstr "Cauca;Colombia & Panama"
#. name for ccc #. name for ccc
msgid "Chamicuro" msgid "Chamicuro"
msgstr "" msgstr "Chamicuro;Peru"
#. name for ccd #. name for ccd
msgid "Creole; Cafundo" msgid "Creole; Cafundo"
msgstr "" msgstr "Creole; Cafundo; Brasilien"
#. name for cce #. name for cce
msgid "Chopi" msgid "Chopi"
msgstr "" msgstr "Chopi;Moçambique"
#. name for ccg #. name for ccg
msgid "Daka; Samba" msgid "Daka; Samba"
msgstr "" msgstr "Daka; Samba, Nigeria"
#. name for cch #. name for cch
msgid "Atsam" msgid "Atsam"
msgstr "" msgstr "Atsam"
#. name for ccj #. name for ccj
msgid "Kasanga" msgid "Kasanga"
msgstr "" msgstr "Kasanga"
#. name for ccl #. name for ccl
msgid "Cutchi-Swahili" msgid "Cutchi-Swahili"
msgstr "" msgstr "Cutchi-Swahili"
#. name for ccm #. name for ccm
msgid "Creole Malay; Malaccan" msgid "Creole Malay; Malaccan"
@ -4850,75 +4850,75 @@ msgstr ""
#. name for cco #. name for cco
msgid "Chinantec; Comaltepec" msgid "Chinantec; Comaltepec"
msgstr "" msgstr "Chinantec; Comaltepec"
#. name for ccp #. name for ccp
msgid "Chakma" msgid "Chakma"
msgstr "" msgstr "Chakma"
#. name for ccq #. name for ccq
msgid "Chaungtha" msgid "Chaungtha"
msgstr "" msgstr "Chaungtha"
#. name for ccr #. name for ccr
msgid "Cacaopera" msgid "Cacaopera"
msgstr "" msgstr "Cacaopera"
#. name for cda #. name for cda
msgid "Choni" msgid "Choni"
msgstr "" msgstr "Choni"
#. name for cde #. name for cde
msgid "Chenchu" msgid "Chenchu"
msgstr "" msgstr "Chenchu"
#. name for cdf #. name for cdf
msgid "Chiru" msgid "Chiru"
msgstr "" msgstr "Chiru"
#. name for cdg #. name for cdg
msgid "Chamari" msgid "Chamari"
msgstr "" msgstr "Chamari"
#. name for cdh #. name for cdh
msgid "Chambeali" msgid "Chambeali"
msgstr "" msgstr "Chambeali"
#. name for cdi #. name for cdi
msgid "Chodri" msgid "Chodri"
msgstr "" msgstr "Chodri"
#. name for cdj #. name for cdj
msgid "Churahi" msgid "Churahi"
msgstr "" msgstr "Churahi"
#. name for cdm #. name for cdm
msgid "Chepang" msgid "Chepang"
msgstr "" msgstr "Chepang"
#. name for cdn #. name for cdn
msgid "Chaudangsi" msgid "Chaudangsi"
msgstr "" msgstr "Chaudangsi"
#. name for cdo #. name for cdo
msgid "Chinese; Min Dong" msgid "Chinese; Min Dong"
msgstr "" msgstr "Kinesiska; Min Dong"
#. name for cdr #. name for cdr
msgid "Cinda-Regi-Tiyal" msgid "Cinda-Regi-Tiyal"
msgstr "" msgstr "Cinda-Regi-Tiyal"
#. name for cds #. name for cds
msgid "Chadian Sign Language" msgid "Chadian Sign Language"
msgstr "" msgstr "Chadian teckenspråk"
#. name for cdy #. name for cdy
msgid "Chadong" msgid "Chadong"
msgstr "" msgstr "Chadong"
#. name for cdz #. name for cdz
msgid "Koda" msgid "Koda"
msgstr "" msgstr "Koda"
#. name for cea #. name for cea
msgid "Chehalis; Lower" msgid "Chehalis; Lower"
@ -4930,11 +4930,11 @@ msgstr "Cebuano"
#. name for ceg #. name for ceg
msgid "Chamacoco" msgid "Chamacoco"
msgstr "" msgstr "Chamacoco"
#. name for cen #. name for cen
msgid "Cen" msgid "Cen"
msgstr "" msgstr "Cen"
#. name for ces #. name for ces
msgid "Czech" msgid "Czech"
@ -4942,7 +4942,7 @@ msgstr "Tjeckiska"
#. name for cet #. name for cet
msgid "Centúúm" msgid "Centúúm"
msgstr "" msgstr "Centúúm"
#. name for cfa #. name for cfa
msgid "Dijim-Bwilim" msgid "Dijim-Bwilim"
@ -4950,31 +4950,31 @@ msgstr ""
#. name for cfd #. name for cfd
msgid "Cara" msgid "Cara"
msgstr "" msgstr "Cara"
#. name for cfg #. name for cfg
msgid "Como Karim" msgid "Como Karim"
msgstr "" msgstr "Como Karim"
#. name for cfm #. name for cfm
msgid "Chin; Falam" msgid "Chin; Falam"
msgstr "" msgstr "Chin; Falam"
#. name for cga #. name for cga
msgid "Changriwa" msgid "Changriwa"
msgstr "" msgstr "Changriwa"
#. name for cgc #. name for cgc
msgid "Kagayanen" msgid "Kagayanen"
msgstr "" msgstr "Kagayanen"
#. name for cgg #. name for cgg
msgid "Chiga" msgid "Chiga"
msgstr "" msgstr "Chiga"
#. name for cgk #. name for cgk
msgid "Chocangacakha" msgid "Chocangacakha"
msgstr "" msgstr "Chocangacakha; Butan"
#. name for cha #. name for cha
msgid "Chamorro" msgid "Chamorro"
@ -4986,11 +4986,11 @@ msgstr "Chibcha"
#. name for chc #. name for chc
msgid "Catawba" msgid "Catawba"
msgstr "" msgstr "Catawba"
#. name for chd #. name for chd
msgid "Chontal; Highland Oaxaca" msgid "Chontal; Highland Oaxaca"
msgstr "" msgstr "Chontal; Highland Oaxaca; Mexico"
#. name for che #. name for che
msgid "Chechen" msgid "Chechen"
@ -4998,7 +4998,7 @@ msgstr "Tjetjenska"
#. name for chf #. name for chf
msgid "Chontal; Tabasco" msgid "Chontal; Tabasco"
msgstr "" msgstr "Chontal; Tabasco"
#. name for chg #. name for chg
msgid "Chagatai" msgid "Chagatai"
@ -5006,7 +5006,7 @@ msgstr "Chagatai"
#. name for chh #. name for chh
msgid "Chinook" msgid "Chinook"
msgstr "" msgstr "Chinook"
#. name for chj #. name for chj
msgid "Chinantec; Ojitlán" msgid "Chinantec; Ojitlán"

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = u'calibre' __appname__ = u'calibre'
numeric_version = (0, 9, 31) numeric_version = (0, 9, 32)
__version__ = u'.'.join(map(unicode, numeric_version)) __version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -320,7 +320,7 @@ class ITUNES(DriverBase):
self.verbose = self.settings().extra_customization[self.DEBUG_LOGGING] self.verbose = self.settings().extra_customization[self.DEBUG_LOGGING]
if self.verbose: if self.verbose:
logger().info("%s.__init__():" % self.__class__.__name__) logger().info("%s.__init__():" % self.__class__.__name__)
logger().info(" Debug logging enabled in iTunes plugin settings") logger().info(" Debug logging enabled")
@property @property
def cache_dir(self): def cache_dir(self):
@ -1288,7 +1288,7 @@ class ITUNES(DriverBase):
logger().error(" failed to add '%s' to Device|Books" % metadata.title) logger().error(" failed to add '%s' to Device|Books" % metadata.title)
raise UserFeedback("Unable to add '%s' in direct connect mode" % metadata.title, raise UserFeedback("Unable to add '%s' in direct connect mode" % metadata.title,
details=None, level=UserFeedback.ERROR) details=None, level=UserFeedback.ERROR)
self._wait_for_writable_metadata(added) #self._wait_for_writable_metadata(added)
return added return added
elif iswindows: elif iswindows:
@ -1471,6 +1471,7 @@ class ITUNES(DriverBase):
if self.verbose: if self.verbose:
logger().info(" %s._cover_to_thumb()" % self.__class__.__name__) logger().info(" %s._cover_to_thumb()" % self.__class__.__name__)
#logger().info("db_added: %s lb_added: %s" % (db_added, lb_added))
thumb = None thumb = None
if metadata.cover: if metadata.cover:
@ -1514,13 +1515,13 @@ class ITUNES(DriverBase):
''' '''
if lb_added: if lb_added:
delay = 2.0 delay = 2.0
self._wait_for_writable_metadata(db_added, delay=delay)
# Wait for updatable artwork # Wait for updatable artwork
attempts = 9 attempts = 9
while attempts: while attempts:
try: try:
lb_added.artworks[1].data_.set(cover_data) lb_added.artworks[1].data_.set(cover_data)
break
except: except:
attempts -= 1 attempts -= 1
time.sleep(delay) time.sleep(delay)
@ -3229,6 +3230,11 @@ class ITUNES(DriverBase):
if self.verbose: if self.verbose:
logger().info(" %s._wait_for_writable_metadata()" % self.__class__.__name__) logger().info(" %s._wait_for_writable_metadata()" % self.__class__.__name__)
if not db_added:
if self.verbose:
logger().info("called from %s() with null db_added" % sys._getframe(1).f_code.co_name)
return
attempts = 9 attempts = 9
while attempts: while attempts:
try: try:

View File

@ -279,11 +279,11 @@ class POCKETBOOK602(USBMS):
class POCKETBOOK622(POCKETBOOK602): class POCKETBOOK622(POCKETBOOK602):
name = 'PocketBook 622 Device Interface' name = 'PocketBook 622 Device Interface'
description = _('Communicate with the PocketBook 622 reader.') description = _('Communicate with the PocketBook 622 and 623 readers.')
EBOOK_DIR_MAIN = '' EBOOK_DIR_MAIN = ''
VENDOR_ID = [0x0489] VENDOR_ID = [0x0489]
PRODUCT_ID = [0xe107] PRODUCT_ID = [0xe107, 0xcff1]
BCD = [0x0326] BCD = [0x0326]
VENDOR_NAME = 'LINUX' VENDOR_NAME = 'LINUX'

View File

@ -224,16 +224,19 @@ class libiMobileDevice():
def copy_to_iDevice(self, src, dst): def copy_to_iDevice(self, src, dst):
''' '''
High-level convenience method to copy src on local filesystem to High-level convenience method to copy src from local filesystem to
dst on iDevice. dst on iDevice.
Assumed to be a binary file (epub, sqlite, etc)
src: file on local filesystem src: file on local filesystem
dst: file to be created on iOS filesystem dst: file to be created on iOS filesystem
''' '''
self._log_location("src='%s', dst='%s'" % (src, dst)) self._log_location("src=%s, dst=%s" % (repr(src), repr(dst)))
with open(src) as f: mode = 'rb'
with open(src, mode) as f:
content = bytearray(f.read()) content = bytearray(f.read())
mode = 'wb' mode = 'wb'
handle = self._afc_file_open(dst, mode=mode) handle = self._afc_file_open(str(dst), mode=mode)
if handle is not None: if handle is not None:
success = self._afc_file_write(handle, content, mode=mode) success = self._afc_file_write(handle, content, mode=mode)
if self.verbose: if self.verbose:
@ -533,7 +536,7 @@ class libiMobileDevice():
else: else:
if self.verbose: if self.verbose:
self.log(" could not open file") self.log(" could not open file")
raise libiMobileDeviceIOException("could not open file '%s' for reading" % path) raise libiMobileDeviceIOException("could not open file %s for reading" % repr(path))
return data return data
@ -800,7 +803,7 @@ class libiMobileDevice():
error: (afc_error_t) AFC_E_SUCCESS (0) on success or AFC_E_* error value error: (afc_error_t) AFC_E_SUCCESS (0) on success or AFC_E_* error value
''' '''
self._log_location("'%s', mode='%s'" % (filename, mode)) self._log_location("%s, mode='%s'" % (repr(filename), mode))
handle = c_ulonglong(0) handle = c_ulonglong(0)
@ -1682,6 +1685,18 @@ class libiMobileDevice():
raise libiMobileDeviceException(error_description) raise libiMobileDeviceException(error_description)
# ~~~ logging ~~~ # ~~~ logging ~~~
def _log_diagnostic(self, msg=None):
'''
Print msg to console
'''
if not self.verbose:
return
if msg:
debug_print(" %s" % msg)
else:
debug_print()
def _log_location(self, *args): def _log_location(self, *args):
''' '''
''' '''

View File

@ -74,7 +74,7 @@ def read_border(parent, dest):
for border in XPath('./w:pBdr')(parent): for border in XPath('./w:pBdr')(parent):
for edge in ('left', 'top', 'right', 'bottom'): for edge in ('left', 'top', 'right', 'bottom'):
for elem in XPath('./w:%s' % edge): for elem in XPath('./w:%s' % edge)(border):
color = get(elem, 'w:color') color = get(elem, 'w:color')
if color is not None: if color is not None:
vals['border_%s_color' % edge] = simple_color(color) vals['border_%s_color' % edge] = simple_color(color)
@ -151,8 +151,8 @@ def read_spacing(parent, dest):
l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto') l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
if l is not None: if l is not None:
lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0) lh = simple_float(l, 0.05) if lr in {'exact', 'atLeast'} else simple_float(l, 1/240.0)
line_height = '%.3g%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '') line_height = '%.3g%s' % (lh, 'pt' if lr in {'exact', 'atLeast'} else '')
setattr(dest, 'margin_top', padding_top) setattr(dest, 'margin_top', padding_top)
setattr(dest, 'margin_bottom', padding_bottom) setattr(dest, 'margin_bottom', padding_bottom)
@ -189,6 +189,89 @@ def read_numbering(parent, dest):
val = (num_id, lvl) if num_id is not None or lvl is not None else inherit val = (num_id, lvl) if num_id is not None or lvl is not None else inherit
setattr(dest, 'numbering', val) setattr(dest, 'numbering', val)
class Frame(object):
all_attributes = ('drop_cap', 'h', 'w', 'h_anchor', 'h_rule', 'v_anchor', 'wrap',
'h_space', 'v_space', 'lines', 'x_align', 'y_align', 'x', 'y')
def __init__(self, fp):
self.drop_cap = get(fp, 'w:dropCap', 'none')
try:
self.h = int(get(fp, 'w:h'))/20
except (ValueError, TypeError):
self.h = 0
try:
self.w = int(get(fp, 'w:w'))/20
except (ValueError, TypeError):
self.w = None
try:
self.x = int(get(fp, 'w:x'))/20
except (ValueError, TypeError):
self.x = 0
try:
self.y = int(get(fp, 'w:y'))/20
except (ValueError, TypeError):
self.y = 0
self.h_anchor = get(fp, 'w:hAnchor', 'page')
self.h_rule = get(fp, 'w:hRule', 'auto')
self.v_anchor = get(fp, 'w:vAnchor', 'page')
self.wrap = get(fp, 'w:wrap', 'around')
self.x_align = get(fp, 'w:xAlign')
self.y_align = get(fp, 'w:yAlign')
try:
self.h_space = int(get(fp, 'w:hSpace'))/20
except (ValueError, TypeError):
self.h_space = 0
try:
self.v_space = int(get(fp, 'w:vSpace'))/20
except (ValueError, TypeError):
self.v_space = 0
try:
self.lines = int(get(fp, 'w:lines'))
except (ValueError, TypeError):
self.lines = 1
def css(self, page):
is_dropcap = self.drop_cap in {'drop', 'margin'}
ans = {'overflow': 'hidden'}
if is_dropcap:
ans['float'] = 'left'
ans['margin'] = '0'
ans['padding-right'] = '0.2em'
else:
if self.h_rule != 'auto':
t = 'min-height' if self.h_rule == 'atLeast' else 'height'
ans[t] = '%.3gpt' % self.h
if self.w is not None:
ans['width'] = '%.3gpt' % self.w
ans['padding-top'] = ans['padding-bottom'] = '%.3gpt' % self.v_space
if self.wrap not in {None, 'none'}:
ans['padding-left'] = ans['padding-right'] = '%.3gpt' % self.h_space
if self.x_align is None:
fl = 'left' if self.x/page.width < 0.5 else 'right'
else:
fl = 'right' if self.x_align == 'right' else 'left'
ans['float'] = fl
return ans
def __eq__(self, other):
for x in self.all_attributes:
if getattr(other, x, inherit) != getattr(self, x):
return False
return True
def __ne__(self, other):
return not self.__eq__(other)
def read_frame(parent, dest):
ans = inherit
for fp in XPath('./w:framePr')(parent):
ans = Frame(fp)
setattr(dest, 'frame', ans)
# }}} # }}}
class ParagraphStyle(object): class ParagraphStyle(object):
@ -208,7 +291,7 @@ class ParagraphStyle(object):
# Misc. # Misc.
'text_indent', 'text_align', 'line_height', 'direction', 'background_color', 'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
'numbering', 'font_family', 'font_size', 'numbering', 'font_family', 'font_size', 'frame',
) )
def __init__(self, pPr=None): def __init__(self, pPr=None):
@ -225,7 +308,7 @@ class ParagraphStyle(object):
): ):
setattr(self, p, binary_property(pPr, p)) setattr(self, p, binary_property(pPr, p))
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering'): for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering', 'frame'):
f = globals()['read_%s' % x] f = globals()['read_%s' % x]
f(pPr, self) f(pPr, self)
@ -286,5 +369,3 @@ class ParagraphStyle(object):
return self._css return self._css
# TODO: keepNext must be done at markup level # TODO: keepNext must be done at markup level

View File

@ -11,7 +11,7 @@ import os, sys, shutil
from lxml import etree from lxml import etree
from calibre import walk, guess_type from calibre import walk, guess_type
from calibre.ebooks.metadata import string_to_authors from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.docx import InvalidDOCX from calibre.ebooks.docx import InvalidDOCX
from calibre.ebooks.docx.names import DOCUMENT, DOCPROPS, XPath, APPPROPS from calibre.ebooks.docx.names import DOCUMENT, DOCPROPS, XPath, APPPROPS
@ -49,6 +49,7 @@ def read_doc_props(raw, mi):
aut.extend(string_to_authors(author.text)) aut.extend(string_to_authors(author.text))
if aut: if aut:
mi.authors = aut mi.authors = aut
mi.author_sort = authors_to_sort_string(aut)
desc = XPath('//dc:description')(root) desc = XPath('//dc:description')(root)
if desc: if desc:
@ -181,7 +182,9 @@ class DOCX(object):
else: else:
root = fromstring(raw) root = fromstring(raw)
for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
target = '/'.join((base, item.get('Target').lstrip('/'))) target = item.get('Target')
if item.get('TargetMode', None) != 'External':
target = '/'.join((base, target.lstrip('/')))
typ = item.get('Type') typ = item.get('Type')
Id = item.get('Id') Id = item.get('Id')
by_id[Id] = by_type[typ] = target by_id[Id] = by_type[typ] = target

View File

@ -0,0 +1,62 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from collections import OrderedDict
from calibre.ebooks.docx.names import get, XPath, descendants
class Note(object):
def __init__(self, parent):
self.type = get(parent, 'w:type', 'normal')
self.parent = parent
def __iter__(self):
for p in descendants(self.parent, 'w:p'):
yield p
class Footnotes(object):
def __init__(self):
self.footnotes = {}
self.endnotes = {}
self.counter = 0
self.notes = OrderedDict()
def __call__(self, footnotes, endnotes):
if footnotes is not None:
for footnote in XPath('./w:footnote[@w:id]')(footnotes):
fid = get(footnote, 'w:id')
if fid:
self.footnotes[fid] = Note(footnote)
if endnotes is not None:
for endnote in XPath('./w:endnote[@w:id]')(endnotes):
fid = get(endnote, 'w:id')
if fid:
self.endnotes[fid] = Note(endnote)
def get_ref(self, ref):
fid = get(ref, 'w:id')
notes = self.footnotes if ref.tag.endswith('}footnoteReference') else self.endnotes
note = notes.get(fid, None)
if note is not None and note.type == 'normal':
self.counter += 1
anchor = 'note_%d' % self.counter
self.notes[anchor] = (type('')(self.counter), note)
return anchor, type('')(self.counter)
return None, None
def __iter__(self):
for anchor, (counter, note) in self.notes.iteritems():
yield anchor, counter, note
@property
def has_notes(self):
return bool(self.notes)

View File

@ -0,0 +1,205 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from lxml.html.builder import IMG
from calibre.ebooks.docx.names import XPath, get, barename
from calibre.utils.filenames import ascii_filename
from calibre.utils.imghdr import what
def emu_to_pt(x):
return x / 12700
def get_image_properties(parent):
width = height = None
for extent in XPath('./wp:extent')(parent):
try:
width = emu_to_pt(int(extent.get('cx')))
except (TypeError, ValueError):
pass
try:
height = emu_to_pt(int(extent.get('cy')))
except (TypeError, ValueError):
pass
ans = {}
if width is not None:
ans['width'] = '%.3gpt' % width
if height is not None:
ans['height'] = '%.3gpt' % height
alt = None
for docPr in XPath('./wp:docPr')(parent):
x = docPr.get('descr', None)
if x:
alt = x
if docPr.get('hidden', None) in {'true', 'on', '1'}:
ans['display'] = 'none'
return ans, alt
def get_image_margins(elem):
ans = {}
for w, css in {'L':'left', 'T':'top', 'R':'right', 'B':'bottom'}.iteritems():
val = elem.get('dist%s' % w, None)
if val is not None:
try:
val = emu_to_pt(val)
except (TypeError, ValueError):
continue
ans['padding-%s' % css] = '%.3gpt' % val
return ans
def get_hpos(anchor, page_width):
for ph in XPath('./wp:positionH')(anchor):
rp = ph.get('relativeFrom', None)
if rp == 'leftMargin':
return 0
if rp == 'rightMargin':
return 1
for align in XPath('./wp:align')(ph):
al = align.text
if al == 'left':
return 0
if al == 'center':
return 0.5
if al == 'right':
return 1
for po in XPath('./wp:posOffset')(ph):
try:
pos = emu_to_pt(int(po.text))
except (TypeError, ValueError):
continue
return pos/page_width
for sp in XPath('./wp:simplePos')(anchor):
try:
x = emu_to_pt(sp.get('x', None))
except (TypeError, ValueError):
continue
return x/page_width
return 0
class Images(object):
def __init__(self):
self.rid_map = {}
self.used = {}
self.names = set()
self.all_images = set()
def __call__(self, relationships_by_id):
self.rid_map = relationships_by_id
def generate_filename(self, rid, base=None):
if rid in self.used:
return self.used[rid]
raw = self.docx.read(self.rid_map[rid])
base = base or ascii_filename(self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_')
ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
base = base.rpartition('.')[0] + '.' + ext
exists = frozenset(self.used.itervalues())
c = 1
while base in exists:
n, e = base.rpartition('.')[0::2]
base = '%s-%d.%s' % (n, c, e)
c += 1
self.used[rid] = base
with open(os.path.join(self.dest_dir, base), 'wb') as f:
f.write(raw)
self.all_images.add('images/' + base)
return base
def pic_to_img(self, pic, alt=None):
name = None
for pr in XPath('descendant::pic:cNvPr')(pic):
name = pr.get('name', None)
if name:
name = ascii_filename(name).replace(' ', '_')
alt = pr.get('descr', None)
for a in XPath('descendant::a:blip[@r:embed]')(pic):
rid = get(a, 'r:embed')
if rid in self.rid_map:
src = self.generate_filename(rid, name)
img = IMG(src='images/%s' % src)
if alt:
img(alt=alt)
return img
def drawing_to_html(self, drawing, page):
# First process the inline pictures
for inline in XPath('./wp:inline')(drawing):
style, alt = get_image_properties(inline)
for pic in XPath('descendant::pic:pic')(inline):
ans = self.pic_to_img(pic, alt)
if ans is not None:
if style:
ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))
yield ans
# Now process the floats
for anchor in XPath('./wp:anchor')(drawing):
style, alt = get_image_properties(anchor)
self.get_float_properties(anchor, style, page)
for pic in XPath('descendant::pic:pic')(anchor):
ans = self.pic_to_img(pic, alt)
if ans is not None:
if style:
ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))
yield ans
def get_float_properties(self, anchor, style, page):
if 'display' not in style:
style['display'] = 'block'
padding = get_image_margins(anchor)
width = float(style.get('width', '100pt')[:-2])
page_width = page.width - page.margin_left - page.margin_right
hpos = get_hpos(anchor, page_width) + width/(2*page_width)
wrap_elem = None
dofloat = False
for child in reversed(anchor):
bt = barename(child.tag)
if bt in {'wrapNone', 'wrapSquare', 'wrapThrough', 'wrapTight', 'wrapTopAndBottom'}:
wrap_elem = child
dofloat = bt not in {'wrapNone', 'wrapTopAndBottom'}
break
if wrap_elem is not None:
padding.update(get_image_margins(wrap_elem))
wt = wrap_elem.get('wrapText', None)
hpos = 0 if wt == 'right' else 1 if wt == 'left' else hpos
if dofloat:
style['float'] = 'left' if hpos < 0.65 else 'right'
else:
ml, mr = (None, None) if hpos < 0.34 else ('auto', None) if hpos > 0.65 else ('auto', 'auto')
if ml is not None:
style['margin-left'] = ml
if mr is not None:
style['margin-right'] = mr
style.update(padding)
def to_html(self, elem, page, docx, dest_dir):
dest = os.path.join(dest_dir, 'images')
if not os.path.exists(dest):
os.mkdir(dest)
self.dest_dir, self.docx = dest, docx
if elem.tag.endswith('}drawing'):
for tag in self.drawing_to_html(elem, page):
yield tag
# TODO: Handle w:pict

View File

@ -6,14 +6,23 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from future_builtins import map
from lxml.etree import XPath as X from lxml.etree import XPath as X
from calibre.utils.filenames import ascii_text
DOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument' DOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'
DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties' DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties' APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles' STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering' NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable' FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable'
IMAGES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'
LINKS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink'
FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes'
ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes'
namespaces = { namespaces = {
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main', 'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
@ -65,7 +74,32 @@ def barename(x):
def XML(x): def XML(x):
return '{%s}%s' % (namespaces['xml'], x) return '{%s}%s' % (namespaces['xml'], x)
def get(x, attr, default=None): def expand(name):
ns, name = attr.partition(':')[0::2] ns, tag = name.partition(':')[0::2]
return x.attrib.get('{%s}%s' % (namespaces[ns], name), default) if ns:
tag = '{%s}%s' % (namespaces[ns], tag)
return tag
def get(x, attr, default=None):
return x.attrib.get(expand(attr), default)
def ancestor(elem, name):
tag = expand(name)
while elem is not None:
elem = elem.getparent()
if getattr(elem, 'tag', None) == tag:
return elem
def generate_anchor(name, existing):
x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_')
c = 1
while y in existing:
y = '%s_%d' % (x, c)
c += 1
return y
def children(elem, *args):
return elem.iterchildren(*map(expand, args))
def descendants(elem, *args):
return elem.iterdescendants(*map(expand, args))

View File

@ -13,6 +13,38 @@ from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
from calibre.ebooks.docx.char_styles import RunStyle from calibre.ebooks.docx.char_styles import RunStyle
from calibre.ebooks.docx.names import XPath, get from calibre.ebooks.docx.names import XPath, get
class PageProperties(object):
'''
Class representing page level properties (page size/margins) read from
sectPr elements.
'''
def __init__(self, elems=()):
self.width = self.height = 595.28, 841.89 # pts, A4
self.margin_left = self.margin_right = 72 # pts
for sectPr in elems:
for pgSz in XPath('./w:pgSz')(sectPr):
w, h = get(pgSz, 'w:w'), get(pgSz, 'w:h')
try:
self.width = int(w)/20
except (ValueError, TypeError):
pass
try:
self.height = int(h)/20
except (ValueError, TypeError):
pass
for pgMar in XPath('./w:pgMar')(sectPr):
l, r = get(pgMar, 'w:left'), get(pgMar, 'w:right')
try:
self.margin_left = int(l)/20
except (ValueError, TypeError):
pass
try:
self.margin_right = int(r)/20
except (ValueError, TypeError):
pass
class Style(object): class Style(object):
''' '''
@ -352,6 +384,19 @@ class Styles(object):
p { text-indent: 1.5em } p { text-indent: 1.5em }
ul, ol, p { margin: 0; padding: 0 } ul, ol, p { margin: 0; padding: 0 }
sup.noteref a { text-decoration: none }
h1.notes-header { page-break-before: always }
dl.notes dt { font-size: large }
dl.notes dt a { text-decoration: none }
dl.notes dd { page-break-after: always }
dl.notes dd:last-of-type { page-break-after: avoid }
''') % (self.body_font_family, self.body_font_size) ''') % (self.body_font_family, self.body_font_size)
if ef: if ef:
prefix = ef + '\n' + prefix prefix = ef + '\n' + prefix

View File

@ -7,17 +7,24 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, os, re import sys, os, re
from collections import OrderedDict from collections import OrderedDict, defaultdict
from lxml import html from lxml import html
from lxml.html.builder import ( from lxml.html.builder import (
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR) HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, SUP, A, DT, DL, DD, H1)
from calibre.ebooks.docx.container import DOCX, fromstring from calibre.ebooks.docx.container import DOCX, fromstring
from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS from calibre.ebooks.docx.names import (
from calibre.ebooks.docx.styles import Styles, inherit XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor,
descendants, ancestor, FOOTNOTES, ENDNOTES)
from calibre.ebooks.docx.styles import Styles, inherit, PageProperties
from calibre.ebooks.docx.numbering import Numbering from calibre.ebooks.docx.numbering import Numbering
from calibre.ebooks.docx.fonts import Fonts from calibre.ebooks.docx.fonts import Fonts
from calibre.ebooks.docx.images import Images
from calibre.ebooks.docx.footnotes import Footnotes
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
class Text: class Text:
@ -31,13 +38,15 @@ class Text:
class Convert(object): class Convert(object):
def __init__(self, path_or_stream, dest_dir=None, log=None): def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
self.docx = DOCX(path_or_stream, log=log) self.docx = DOCX(path_or_stream, log=log)
self.log = self.docx.log self.log = self.docx.log
self.notes_text = notes_text or _('Notes')
self.dest_dir = dest_dir or os.getcwdu() self.dest_dir = dest_dir or os.getcwdu()
self.mi = self.docx.metadata self.mi = self.docx.metadata
self.body = BODY() self.body = BODY()
self.styles = Styles() self.styles = Styles()
self.images = Images()
self.object_map = OrderedDict() self.object_map = OrderedDict()
self.html = HTML( self.html = HTML(
HEAD( HEAD(
@ -64,12 +73,37 @@ class Convert(object):
doc = self.docx.document doc = self.docx.document
relationships_by_id, relationships_by_type = self.docx.document_relationships relationships_by_id, relationships_by_type = self.docx.document_relationships
self.read_styles(relationships_by_type) self.read_styles(relationships_by_type)
self.images(relationships_by_id)
self.layers = OrderedDict() self.layers = OrderedDict()
for wp in XPath('//w:p')(doc): self.framed = [[]]
self.framed_map = {}
self.anchor_map = {}
self.link_map = defaultdict(list)
self.read_page_properties(doc)
for wp, page_properties in self.page_map.iteritems():
self.current_page = page_properties
p = self.convert_p(wp) p = self.convert_p(wp)
self.body.append(p) self.body.append(p)
notes_header = None
if self.footnotes.has_notes:
dl = DL()
dl.set('class', 'notes')
self.body.append(H1(self.notes_text))
notes_header = self.body[-1]
notes_header.set('class', 'notes-header')
self.body.append(dl)
for anchor, text, note in self.footnotes:
dl.append(DT('[', A('' + text, href='#back_%s' % anchor, title=text), id=anchor))
dl[-1][0].tail = ']'
dl.append(DD())
for wp in note:
p = self.convert_p(wp)
dl[-1].append(p)
self.resolve_links(relationships_by_id)
# TODO: tables <w:tbl> child of <w:body> (nested tables?) # TODO: tables <w:tbl> child of <w:body> (nested tables?)
# TODO: Last section properties <w:sectPr> child of <w:body>
self.styles.cascade(self.layers) self.styles.cascade(self.layers)
@ -84,6 +118,7 @@ class Convert(object):
lvl = 0 lvl = 0
numbered.append((html_obj, num_id, lvl)) numbered.append((html_obj, num_id, lvl))
self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map) self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
self.apply_frames()
if len(self.body) > 0: if len(self.body) > 0:
self.body.text = '\n\t' self.body.text = '\n\t'
@ -100,7 +135,39 @@ class Convert(object):
cls = self.styles.class_name(css) cls = self.styles.class_name(css)
if cls: if cls:
html_obj.set('class', cls) html_obj.set('class', cls)
self.write() for html_obj, css in self.framed_map.iteritems():
cls = self.styles.class_name(css)
if cls:
html_obj.set('class', cls)
if notes_header is not None:
for h in self.body.iterchildren('h1', 'h2', 'h3'):
notes_header.tag = h.tag
cls = h.get('class', None)
if cls and cls != 'notes-header':
notes_header.set('class', '%s notes-header' % cls)
break
return self.write()
def read_page_properties(self, doc):
current = []
self.page_map = OrderedDict()
for p in descendants(doc, 'w:p'):
sect = tuple(descendants(p, 'w:sectPr'))
if sect:
pr = PageProperties(sect)
for x in current + [p]:
self.page_map[x] = pr
current = []
else:
current.append(p)
if current:
last = XPath('./w:body/w:sectPr')(doc)
pr = PageProperties(last)
for x in current:
self.page_map[x] = pr
def read_styles(self, relationships_by_type): def read_styles(self, relationships_by_type):
@ -109,16 +176,32 @@ class Convert(object):
if name is None: if name is None:
cname = self.docx.document_name.split('/') cname = self.docx.document_name.split('/')
cname[-1] = defname cname[-1] = defname
if self.docx.exists(cname): if self.docx.exists('/'.join(cname)):
name = name name = name
return name return name
nname = get_name(NUMBERING, 'numbering.xml') nname = get_name(NUMBERING, 'numbering.xml')
sname = get_name(STYLES, 'styles.xml') sname = get_name(STYLES, 'styles.xml')
fname = get_name(FONTS, 'fontTable.xml') fname = get_name(FONTS, 'fontTable.xml')
foname = get_name(FOOTNOTES, 'footnotes.xml')
enname = get_name(ENDNOTES, 'endnotes.xml')
numbering = self.numbering = Numbering() numbering = self.numbering = Numbering()
footnotes = self.footnotes = Footnotes()
fonts = self.fonts = Fonts() fonts = self.fonts = Fonts()
foraw = enraw = None
if foname is not None:
try:
foraw = self.docx.read(foname)
except KeyError:
self.log.warn('Footnotes %s do not exist' % foname)
if enname is not None:
try:
enraw = self.docx.read(enname)
except KeyError:
self.log.warn('Endnotes %s do not exist' % enname)
footnotes(fromstring(foraw) if foraw else None, fromstring(enraw) if enraw else None)
if fname is not None: if fname is not None:
embed_relationships = self.docx.get_relationships(fname)[0] embed_relationships = self.docx.get_relationships(fname)[0]
try: try:
@ -146,7 +229,48 @@ class Convert(object):
self.styles.resolve_numbering(numbering) self.styles.resolve_numbering(numbering)
def create_toc(self):
' Create a TOC from headings in the document '
root = self.body
headings = ('h1', 'h2', 'h3')
tocroot = TOC()
xpaths = [XPath('//%s' % x) for x in headings]
level_prev = {i+1:None for i in xrange(len(xpaths))}
level_prev[0] = tocroot
level_item_map = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)}
item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems}
self.idcount = 0
def ensure_id(elem):
ans = elem.get('id', None)
if not ans:
self.idcount += 1
ans = 'toc_id_%d' % self.idcount
elem.set('id', ans)
return ans
for item in root.iterdescendants(*headings):
lvl = plvl = item_level_map.get(item, None)
if lvl is None:
continue
parent = None
while parent is None:
plvl -= 1
parent = level_prev[plvl]
lvl = plvl + 1
elem_id = ensure_id(item)
text = elem_to_toc_text(item)
toc = parent.add_item('index.html', elem_id, text)
level_prev[lvl] = toc
for i in xrange(lvl+1, len(xpaths)+1):
level_prev[i] = None
if len(tuple(tocroot.flat())) > 1:
return tocroot
def write(self): def write(self):
toc = self.create_toc()
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>') raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
f.write(raw) f.write(raw)
@ -155,19 +279,48 @@ class Convert(object):
with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f: with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
f.write(css.encode('utf-8')) f.write(css.encode('utf-8'))
opf = OPFCreator(self.dest_dir, self.mi)
opf.toc = toc
opf.create_manifest_from_files_in([self.dest_dir])
opf.create_spine(['index.html'])
with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx:
opf.render(of, ncx, 'toc.ncx')
return os.path.join(self.dest_dir, 'metadata.opf')
def convert_p(self, p): def convert_p(self, p):
dest = P() dest = P()
self.object_map[dest] = p self.object_map[dest] = p
style = self.styles.resolve_paragraph(p) style = self.styles.resolve_paragraph(p)
self.layers[p] = [] self.layers[p] = []
for run in XPath('descendant::w:r')(p): self.add_frame(dest, style.frame)
span = self.convert_run(run)
current_anchor = None
current_hyperlink = None
for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'):
if x.tag.endswith('}r'):
span = self.convert_run(x)
if current_anchor is not None:
(dest if len(dest) == 0 else span).set('id', current_anchor)
current_anchor = None
if current_hyperlink is not None:
hl = ancestor(x, 'w:hyperlink')
if hl is not None:
self.link_map[hl].append(span)
else:
current_hyperlink = None
dest.append(span) dest.append(span)
self.layers[p].append(run) self.layers[p].append(x)
elif x.tag.endswith('}bookmarkStart'):
anchor = get(x, 'w:name')
if anchor and anchor not in self.anchor_map:
self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(self.anchor_map.itervalues()))
elif x.tag.endswith('}hyperlink'):
current_hyperlink = x
m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE) m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
if m is not None: if m is not None:
n = min(1, max(6, int(m.group(1)))) n = min(6, max(1, int(m.group(1))))
dest.tag = 'h%d' % n dest.tag = 'h%d' % n
if style.direction == 'rtl': if style.direction == 'rtl':
@ -208,6 +361,31 @@ class Convert(object):
for elem in elems: for elem in elems:
p.remove(elem) p.remove(elem)
wrapper.append(elem) wrapper.append(elem)
return wrapper
def resolve_links(self, relationships_by_id):
for hyperlink, spans in self.link_map.iteritems():
span = spans[0]
if len(spans) > 1:
span = self.wrap_elems(spans, SPAN())
span.tag = 'a'
tgt = get(hyperlink, 'w:tgtFrame')
if tgt:
span.set('target', tgt)
tt = get(hyperlink, 'w:tooltip')
if tt:
span.set('title', tt)
rid = get(hyperlink, 'r:id')
if rid and rid in relationships_by_id:
span.set('href', relationships_by_id[rid])
continue
anchor = get(hyperlink, 'w:anchor')
if anchor and anchor in self.anchor_map:
span.set('href', '#' + self.anchor_map[anchor])
continue
self.log.warn('Hyperlink with unknown target (%s, %s), ignoring' %
(rid, anchor))
span.set('href', '#')
def convert_run(self, run): def convert_run(self, run):
ans = SPAN() ans = SPAN()
@ -239,6 +417,17 @@ class Convert(object):
br = BR() br = BR()
text.add_elem(br) text.add_elem(br)
ans.append(text.elem) ans.append(text.elem)
elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'):
for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
text.add_elem(img)
ans.append(text.elem)
elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'):
anchor, name = self.footnotes.get_ref(child)
if anchor and name:
l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor)
l.set('class', 'noteref')
text.add_elem(l)
ans.append(text.elem)
if text.buf: if text.buf:
setattr(text.elem, text.attr, ''.join(text.buf)) setattr(text.elem, text.attr, ''.join(text.buf))
@ -249,7 +438,39 @@ class Convert(object):
ans.lang = style.lang ans.lang = style.lang
return ans return ans
def add_frame(self, html_obj, style):
last_run = self.framed[-1]
if style is inherit:
if last_run:
self.framed.append([])
return
if last_run:
if last_run[-1][1] == style:
last_run.append((html_obj, style))
else:
self.framed.append((html_obj, style))
else:
last_run.append((html_obj, style))
def apply_frames(self):
for run in filter(None, self.framed):
style = run[0][1]
paras = tuple(x[0] for x in run)
parent = paras[0].getparent()
idx = parent.index(paras[0])
frame = DIV(*paras)
parent.insert(idx, frame)
self.framed_map[frame] = css = style.css(self.page_map[self.object_map[paras[0]]])
self.styles.register(css, 'frame')
if __name__ == '__main__': if __name__ == '__main__':
import shutil
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
default_log.filter_level = default_log.DEBUG default_log.filter_level = default_log.DEBUG
Convert(sys.argv[-1], log=default_log)() dest_dir = os.path.join(os.getcwdu(), 'docx_input')
if os.path.exists(dest_dir):
shutil.rmtree(dest_dir)
os.mkdir(dest_dir)
Convert(sys.argv[-1], dest_dir=dest_dir, log=default_log)()

View File

@ -179,7 +179,7 @@ class Metadata(object):
def deepcopy(self): def deepcopy(self):
''' Do not use this method unless you know what you are doing, if you want to create a simple clone of ''' Do not use this method unless you know what you are doing, if you want to create a simple clone of
this object, use :method:`deepcopy_metadata` instead. ''' this object, use :meth:`deepcopy_metadata` instead. '''
m = Metadata(None) m = Metadata(None)
m.__dict__ = copy.deepcopy(self.__dict__) m.__dict__ = copy.deepcopy(self.__dict__)
object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data'))) object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data')))

View File

@ -21,7 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date, isoformat from calibre.utils.date import parse_date, isoformat
from calibre.utils.localization import get_lang, canonicalize_lang from calibre.utils.localization import get_lang, canonicalize_lang
from calibre import prints, guess_type from calibre import prints, guess_type
from calibre.utils.cleantext import clean_ascii_chars from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
class Resource(object): # {{{ class Resource(object): # {{{
@ -1436,7 +1436,10 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
attrib['name'] = name attrib['name'] = name
if content: if content:
attrib['content'] = content attrib['content'] = content
try:
elem = metadata.makeelement(tag, attrib=attrib) elem = metadata.makeelement(tag, attrib=attrib)
except ValueError:
elem = metadata.makeelement(tag, attrib={k:clean_xml_chars(v) for k, v in attrib.iteritems()})
elem.tail = '\n'+(' '*8) elem.tail = '\n'+(' '*8)
if text: if text:
try: try:

View File

@ -100,7 +100,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
mr = mobi8_reader mr = mobi8_reader
flows = [] flows = []
img_pattern = re.compile(r'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE) img_pattern = re.compile(r'''(<[img\s|image\s|svg:image\s][^>]*>)''', re.IGNORECASE)
img_index_pattern = re.compile(r'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''', re.IGNORECASE) img_index_pattern = re.compile(r'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''', re.IGNORECASE)
tag_pattern = re.compile(r'''(<[^>]*>)''') tag_pattern = re.compile(r'''(<[^>]*>)''')
@ -128,7 +128,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
srcpieces = img_pattern.split(flow) srcpieces = img_pattern.split(flow)
for j in range(1, len(srcpieces), 2): for j in range(1, len(srcpieces), 2):
tag = srcpieces[j] tag = srcpieces[j]
if tag.startswith('<im'): if tag.startswith('<im') or tag.startswith('<svg:image'):
for m in img_index_pattern.finditer(tag): for m in img_index_pattern.finditer(tag):
num = int(m.group(1), 32) num = int(m.group(1), 32)
href = resource_map[num-1] href = resource_map[num-1]

View File

@ -228,7 +228,7 @@ class Mobi8Reader(object):
self.flowinfo.append(FlowInfo(None, None, None, None)) self.flowinfo.append(FlowInfo(None, None, None, None))
svg_tag_pattern = re.compile(br'''(<svg[^>]*>)''', re.IGNORECASE) svg_tag_pattern = re.compile(br'''(<svg[^>]*>)''', re.IGNORECASE)
image_tag_pattern = re.compile(br'''(<image[^>]*>)''', re.IGNORECASE) image_tag_pattern = re.compile(br'''(<(?:svg:)?image[^>]*>)''', re.IGNORECASE)
for j in xrange(1, len(self.flows)): for j in xrange(1, len(self.flows)):
flowpart = self.flows[j] flowpart = self.flows[j]
nstr = '%04d' % j nstr = '%04d' % j
@ -243,7 +243,7 @@ class Mobi8Reader(object):
dir = None dir = None
fname = None fname = None
# strip off anything before <svg if inlining # strip off anything before <svg if inlining
flowpart = flowpart[start:] flowpart = re.sub(br'(</?)svg:', r'\1', flowpart[start:])
else: else:
format = 'file' format = 'file'
dir = "images" dir = "images"

View File

@ -51,7 +51,7 @@ def load_html(path, view, codec='utf-8', mime_type=None,
loading_url = QUrl.fromLocalFile(path) loading_url = QUrl.fromLocalFile(path)
pre_load_callback(loading_url) pre_load_callback(loading_url)
if force_as_html or re.search(r'<[:a-zA-Z0-9-]*svg', html) is None: if force_as_html or re.search(r'<[a-zA-Z0-9-]+:svg', html) is None:
view.setHtml(html, loading_url) view.setHtml(html, loading_url)
else: else:
view.setContent(QByteArray(html.encode(codec)), mime_type, view.setContent(QByteArray(html.encode(codec)), mime_type,
@ -61,4 +61,3 @@ def load_html(path, view, codec='utf-8', mime_type=None,
if not elem.isNull(): if not elem.isNull():
return False return False
return True return True

View File

@ -32,7 +32,8 @@ def dynamic_rescale_factor(node):
classes = node.get('class', '').split(' ') classes = node.get('class', '').split(' ')
classes = [x.replace('calibre_rescale_', '') for x in classes if classes = [x.replace('calibre_rescale_', '') for x in classes if
x.startswith('calibre_rescale_')] x.startswith('calibre_rescale_')]
if not classes: return None if not classes:
return None
factor = 1.0 factor = 1.0
for x in classes: for x in classes:
try: try:
@ -54,7 +55,8 @@ class KeyMapper(object):
return base return base
size = float(size) size = float(size)
base = float(base) base = float(base)
if abs(size - base) < 0.1: return 0 if abs(size - base) < 0.1:
return 0
sign = -1 if size < base else 1 sign = -1 if size < base else 1
endp = 0 if size < base else 36 endp = 0 if size < base else 36
diff = (abs(base - size) * 3) + ((36 - size) / 100) diff = (abs(base - size) * 3) + ((36 - size) / 100)
@ -110,7 +112,8 @@ class EmbedFontsCSSRules(object):
self.href = None self.href = None
def __call__(self, oeb): def __call__(self, oeb):
if not self.body_font_family: return None if not self.body_font_family:
return None
if not self.href: if not self.href:
iid, href = oeb.manifest.generate(u'page_styles', u'page_styles.css') iid, href = oeb.manifest.generate(u'page_styles', u'page_styles.css')
rules = [x.cssText for x in self.rules] rules = [x.cssText for x in self.rules]
@ -228,10 +231,10 @@ class CSSFlattener(object):
bs.append('margin-top: 0pt') bs.append('margin-top: 0pt')
bs.append('margin-bottom: 0pt') bs.append('margin-bottom: 0pt')
if float(self.context.margin_left) >= 0: if float(self.context.margin_left) >= 0:
bs.append('margin-left : %gpt'%\ bs.append('margin-left : %gpt'%
float(self.context.margin_left)) float(self.context.margin_left))
if float(self.context.margin_right) >= 0: if float(self.context.margin_right) >= 0:
bs.append('margin-right : %gpt'%\ bs.append('margin-right : %gpt'%
float(self.context.margin_right)) float(self.context.margin_right))
bs.extend(['padding-left: 0pt', 'padding-right: 0pt']) bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
if self.page_break_on_body: if self.page_break_on_body:
@ -277,8 +280,10 @@ class CSSFlattener(object):
for kind in ('margin', 'padding'): for kind in ('margin', 'padding'):
for edge in ('bottom', 'top'): for edge in ('bottom', 'top'):
property = "%s-%s" % (kind, edge) property = "%s-%s" % (kind, edge)
if property not in cssdict: continue if property not in cssdict:
if '%' in cssdict[property]: continue continue
if '%' in cssdict[property]:
continue
value = style[property] value = style[property]
if value == 0: if value == 0:
continue continue
@ -366,6 +371,11 @@ class CSSFlattener(object):
is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in
cssdict and len(node) == 0 and node.text and cssdict and len(node) == 0 and node.text and
len(node.text) == 1) len(node.text) == 1)
is_drop_cap = is_drop_cap or (
# The docx input plugin generates drop caps that look like this
len(node) == 1 and not node.text and len(node[0]) == 0 and
node[0].text and not node[0].tail and len(node[0].text) == 1 and
'line-height' in cssdict and 'font-size' in cssdict)
if not self.context.disable_font_rescaling and not is_drop_cap: if not self.context.disable_font_rescaling and not is_drop_cap:
_sbase = self.sbase if self.sbase is not None else \ _sbase = self.sbase if self.sbase is not None else \
self.context.source.fbase self.context.source.fbase
@ -436,8 +446,7 @@ class CSSFlattener(object):
keep_classes = set() keep_classes = set()
if cssdict: if cssdict:
items = cssdict.items() items = sorted(cssdict.items())
items.sort()
css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
classes = node.get('class', '').strip() or 'calibre' classes = node.get('class', '').strip() or 'calibre'
klass = ascii_text(STRIPNUM.sub('', classes.split()[0].replace('_', ''))) klass = ascii_text(STRIPNUM.sub('', classes.split()[0].replace('_', '')))
@ -519,8 +528,7 @@ class CSSFlattener(object):
if float(self.context.margin_bottom) >= 0: if float(self.context.margin_bottom) >= 0:
stylizer.page_rule['margin-bottom'] = '%gpt'%\ stylizer.page_rule['margin-bottom'] = '%gpt'%\
float(self.context.margin_bottom) float(self.context.margin_bottom)
items = stylizer.page_rule.items() items = sorted(stylizer.page_rule.items())
items.sort()
css = ';\n'.join("%s: %s" % (key, val) for key, val in items) css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
css = ('@page {\n%s\n}\n'%css) if items else '' css = ('@page {\n%s\n}\n'%css) if items else ''
rules = [r.cssText for r in stylizer.font_face_rules + rules = [r.cssText for r in stylizer.font_face_rules +
@ -556,14 +564,14 @@ class CSSFlattener(object):
body = html.find(XHTML('body')) body = html.find(XHTML('body'))
fsize = self.context.dest.fbase fsize = self.context.dest.fbase
self.flatten_node(body, stylizer, names, styles, pseudo_styles, fsize, item.id) self.flatten_node(body, stylizer, names, styles, pseudo_styles, fsize, item.id)
items = [(key, val) for (val, key) in styles.items()] items = sorted([(key, val) for (val, key) in styles.items()])
items.sort()
# :hover must come after link and :active must come after :hover # :hover must come after link and :active must come after :hover
psels = sorted(pseudo_styles.iterkeys(), key=lambda x : psels = sorted(pseudo_styles.iterkeys(), key=lambda x :
{'hover':1, 'active':2}.get(x, 0)) {'hover':1, 'active':2}.get(x, 0))
for psel in psels: for psel in psels:
styles = pseudo_styles[psel] styles = pseudo_styles[psel]
if not styles: continue if not styles:
continue
x = sorted(((k+':'+psel, v) for v, k in styles.iteritems())) x = sorted(((k+':'+psel, v) for v, k in styles.iteritems()))
items.extend(x) items.extend(x)

View File

@ -375,6 +375,8 @@ class FlowSplitter(object):
for img in root.xpath('//h:img', namespaces=NAMESPACES): for img in root.xpath('//h:img', namespaces=NAMESPACES):
if img.get('style', '') != 'display:none': if img.get('style', '') != 'display:none':
return False return False
if root.xpath('//*[local-name() = "svg"]'):
return False
return True return True
def split_text(self, text, root, size): def split_text(self, text, root, size):

View File

@ -161,13 +161,15 @@ class ChooseLibraryAction(InterfaceAction):
def genesis(self): def genesis(self):
self.base_text = _('%d books') self.base_text = _('%d books')
self.count_changed(0) self.count_changed(0)
self.qaction.triggered.connect(self.choose_library,
type=Qt.QueuedConnection)
self.action_choose = self.menuless_qaction self.action_choose = self.menuless_qaction
self.stats = LibraryUsageStats() self.stats = LibraryUsageStats()
self.popup_type = (QToolButton.InstantPopup if len(self.stats.stats) > 1 else self.popup_type = (QToolButton.InstantPopup if len(self.stats.stats) > 1 else
QToolButton.MenuButtonPopup) QToolButton.MenuButtonPopup)
if len(self.stats.stats) > 1:
self.action_choose.triggered.connect(self.choose_library)
else:
self.qaction.triggered.connect(self.choose_library)
self.choose_menu = self.qaction.menu() self.choose_menu = self.qaction.menu()
@ -200,7 +202,6 @@ class ChooseLibraryAction(InterfaceAction):
type=Qt.QueuedConnection) type=Qt.QueuedConnection)
self.choose_menu.addAction(ac) self.choose_menu.addAction(ac)
self.rename_separator = self.choose_menu.addSeparator() self.rename_separator = self.choose_menu.addSeparator()
self.maintenance_menu = QMenu(_('Library Maintenance')) self.maintenance_menu = QMenu(_('Library Maintenance'))
@ -477,19 +478,20 @@ class ChooseLibraryAction(InterfaceAction):
else: else:
return return
#from calibre.utils.mem import memory # from calibre.utils.mem import memory
#import weakref # import weakref
#from PyQt4.Qt import QTimer # from PyQt4.Qt import QTimer
#self.dbref = weakref.ref(self.gui.library_view.model().db) # self.dbref = weakref.ref(self.gui.library_view.model().db)
#self.before_mem = memory()/1024**2 # self.before_mem = memory()/1024**2
self.gui.library_moved(loc, allow_rebuild=True) self.gui.library_moved(loc, allow_rebuild=True)
#QTimer.singleShot(5000, self.debug_leak) # QTimer.singleShot(5000, self.debug_leak)
def debug_leak(self): def debug_leak(self):
import gc import gc
from calibre.utils.mem import memory from calibre.utils.mem import memory
ref = self.dbref ref = self.dbref
for i in xrange(3): gc.collect() for i in xrange(3):
gc.collect()
if ref() is not None: if ref() is not None:
print 'DB object alive:', ref() print 'DB object alive:', ref()
for r in gc.get_referrers(ref())[:10]: for r in gc.get_referrers(ref())[:10]:
@ -500,7 +502,6 @@ class ChooseLibraryAction(InterfaceAction):
print print
self.dbref = self.before_mem = None self.dbref = self.before_mem = None
def qs_requested(self, idx, *args): def qs_requested(self, idx, *args):
self.switch_requested(self.qs_locations[idx]) self.switch_requested(self.qs_locations[idx])
@ -546,3 +547,4 @@ class ChooseLibraryAction(InterfaceAction):
return False return False
return True return True

View File

@ -907,7 +907,7 @@ class BooksModel(QAbstractTableModel): # {{{
if ht == 'timestamp': # change help text because users know this field as 'date' if ht == 'timestamp': # change help text because users know this field as 'date'
ht = 'date' ht = 'date'
if self.db.field_metadata[self.column_map[section]]['is_category']: if self.db.field_metadata[self.column_map[section]]['is_category']:
is_cat = '.\n\n' + _('Click in this column and press Q to to Quickview books with the same %s' % ht) is_cat = '.\n\n' + _('Click in this column and press Q to Quickview books with the same %s' % ht)
else: else:
is_cat = '' is_cat = ''
return QVariant(_('The lookup/search name is "{0}"{1}').format(ht, is_cat)) return QVariant(_('The lookup/search name is "{0}"{1}').format(ht, is_cat))
@ -1029,7 +1029,7 @@ class BooksModel(QAbstractTableModel): # {{{
return False return False
val = (int(value.toInt()[0]) if column == 'rating' else val = (int(value.toInt()[0]) if column == 'rating' else
value.toDateTime() if column in ('timestamp', 'pubdate') value.toDateTime() if column in ('timestamp', 'pubdate')
else unicode(value.toString()).strip()) else re.sub(ur'\s', u' ', unicode(value.toString()).strip()))
id = self.db.id(row) id = self.db.id(row)
books_to_refresh = set([id]) books_to_refresh = set([id])
if column == 'rating': if column == 'rating':

View File

@ -45,6 +45,9 @@ def save_dialog(parent, title, msg, det_msg=''):
d.setStandardButtons(QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel) d.setStandardButtons(QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel)
return d.exec_() return d.exec_()
def clean_text(x):
return re.sub(r'\s', ' ', x.strip())
''' '''
The interface common to all widgets used to set basic metadata The interface common to all widgets used to set basic metadata
class BasicMetadataWidget(object): class BasicMetadataWidget(object):
@ -117,7 +120,7 @@ class TitleEdit(EnLineEdit):
def current_val(self): def current_val(self):
def fget(self): def fget(self):
title = unicode(self.text()).strip() title = clean_text(unicode(self.text()))
if not title: if not title:
title = self.get_default() title = self.get_default()
return title return title
@ -289,7 +292,7 @@ class AuthorsEdit(EditWithComplete):
def current_val(self): def current_val(self):
def fget(self): def fget(self):
au = unicode(self.text()).strip() au = clean_text(unicode(self.text()))
if not au: if not au:
au = self.get_default() au = self.get_default()
return string_to_authors(au) return string_to_authors(au)
@ -352,7 +355,7 @@ class AuthorSortEdit(EnLineEdit):
def current_val(self): def current_val(self):
def fget(self): def fget(self):
return unicode(self.text()).strip() return clean_text(unicode(self.text()))
def fset(self, val): def fset(self, val):
if not val: if not val:
@ -472,7 +475,7 @@ class SeriesEdit(EditWithComplete):
def current_val(self): def current_val(self):
def fget(self): def fget(self):
return unicode(self.currentText()).strip() return clean_text(unicode(self.currentText()))
def fset(self, val): def fset(self, val):
if not val: if not val:
@ -1135,7 +1138,7 @@ class TagsEdit(EditWithComplete): # {{{
@dynamic_property @dynamic_property
def current_val(self): def current_val(self):
def fget(self): def fget(self):
return [x.strip() for x in unicode(self.text()).split(',')] return [clean_text(x) for x in unicode(self.text()).split(',')]
def fset(self, val): def fset(self, val):
if not val: if not val:
val = [] val = []
@ -1237,7 +1240,7 @@ class IdentifiersEdit(QLineEdit): # {{{
def current_val(self): def current_val(self):
def fget(self): def fget(self):
raw = unicode(self.text()).strip() raw = unicode(self.text()).strip()
parts = [x.strip() for x in raw.split(',')] parts = [clean_text(x) for x in raw.split(',')]
ans = {} ans = {}
for x in parts: for x in parts:
c = x.split(':') c = x.split(':')
@ -1376,7 +1379,7 @@ class PublisherEdit(EditWithComplete): # {{{
def current_val(self): def current_val(self):
def fget(self): def fget(self):
return unicode(self.currentText()).strip() return clean_text(unicode(self.currentText()))
def fset(self, val): def fset(self, val):
if not val: if not val:

View File

@ -146,8 +146,12 @@ class CreateVirtualLibrary(QDialog): # {{{
<p>For example you can use a Virtual Library to only show you books with the Tag <i>"Unread"</i> <p>For example you can use a Virtual Library to only show you books with the Tag <i>"Unread"</i>
or only books by <i>"My Favorite Author"</i> or only books in a particular series.</p> or only books by <i>"My Favorite Author"</i> or only books in a particular series.</p>
<p>More information and examples are available in the
<a href="http://manual.calibre-ebook.com/virtual_libraries.html">User Manual</a>.</p>
''')) '''))
hl.setWordWrap(True) hl.setWordWrap(True)
hl.setOpenExternalLinks(True)
hl.setFrameStyle(hl.StyledPanel) hl.setFrameStyle(hl.StyledPanel)
gl.addWidget(hl, 0, 3, 4, 1) gl.addWidget(hl, 0, 3, 4, 1)

View File

@ -41,7 +41,6 @@ class JavaScriptLoader(object):
'hyphenation', 'hyphenator', 'utils', 'cfi', 'indexing', 'paged', 'hyphenation', 'hyphenator', 'utils', 'cfi', 'indexing', 'paged',
'fs', 'math', 'extract') 'fs', 'math', 'extract')
def __init__(self, dynamic_coffeescript=False): def __init__(self, dynamic_coffeescript=False):
self._dynamic_coffeescript = dynamic_coffeescript self._dynamic_coffeescript = dynamic_coffeescript
if self._dynamic_coffeescript: if self._dynamic_coffeescript:
@ -68,6 +67,7 @@ class JavaScriptLoader(object):
allow_user_override=False).decode('utf-8') allow_user_override=False).decode('utf-8')
else: else:
dynamic = (self._dynamic_coffeescript and dynamic = (self._dynamic_coffeescript and
calibre.__file__ and not calibre.__file__.endswith('.pyo') and
os.path.exists(calibre.__file__)) os.path.exists(calibre.__file__))
ans = compiled_coffeescript(src, dynamic=dynamic).decode('utf-8') ans = compiled_coffeescript(src, dynamic=dynamic).decode('utf-8')
self._cache[name] = ans self._cache[name] = ans
@ -105,4 +105,3 @@ class JavaScriptLoader(object):
evaljs('\n\n'.join(self._hp_cache.itervalues())) evaljs('\n\n'.join(self._hp_cache.itervalues()))
return lang return lang

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More