Merge from trunk

This commit is contained in:
Charles Haley 2013-05-24 15:56:52 +02:00
commit 00c1d4ea54
125 changed files with 33062 additions and 27652 deletions

View File

@ -20,6 +20,56 @@
# new recipes:
# - title:
- version: 0.9.32
date: 2013-05-24
new features:
- title: "Show the number of currently selected books in the status bar at the bottom of the book list"
- title: "Driver for PocketBook Touch 623 and Yarvik tablet Xenta 13c"
tickets: [1182850, 1181669]
- title: "When editing dates such as published, allow pressing the minus key to clear the date and the = key to set the date to today."
tickets: [1181449]
bug fixes:
- title: "EPUB/AZW3 Output: Fix regression that caused erros when trying to convert documents that have URLs with invalid (non-utf-8) quoting."
tickets: [1181049]
- title: "When backing up metadata automatically remove XML invalid chars, instead of erroring out"
- title: "ebook-viewer: Fix --debug-javascript option causing an error when running from a binary build on os x and linux"
- title: "Fix switch library dialog and menu both popping up when clicking the library button in some window managers"
- title: "Apple driver: Fix a regression in 0.9.31 that could cause sending books to the device to hang"
- title: "When setting metadata using the edit metadata dialog, convert newlines, tabs etc. to normal spaces"
tickets: [1182268]
- title: "EPUB/AZW3 Output: Fix pages that contain only an svg image being regarded as empty and removed during splitting"
- title: "AZW3 Input: Handle files that use unnecessary svg: prefixes."
tickets: [1182257]
- title: "EPUB Input: Handle EPUB files that have no <metadata> section in their OPF."
tickets: [1181546]
- title: "Get Books: Fix Foyles UK store plugin."
tickets: [1181494]
improved recipes:
- Wall Street Journal
- Various Polish news sources
- Handelsblatt
- The Australian
- Las Vegas Review
- NME
new recipes:
- title: WirtschaftsWoche Online
author: Hegi
- version: 0.9.31
date: 2013-05-17

View File

@ -57,6 +57,26 @@ library. The virtual library will then be created based on the search
you just typed in. Searches are very powerful, for examples of the kinds
of things you can do with them, see :ref:`search_interface`.
Examples of useful Virtual Libraries
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* Books added to |app| in the last day::
date:>1daysago
* Books added to |app| in the last month::
date:>30daysago
* Books with a rating of 5 stars::
rating:5
* Books with a rating of at least 4 stars::
rating:>=4
* Books with no rating::
rating:false
* Periodicals downloaded by the Fetch News function in |app|::
tags:=News and author:=calibre
* Books with no tags::
tags:false
* Books with no covers::
cover:false
Working with Virtual Libraries
-------------------------------------

View File

@ -1,47 +1,24 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Adventure_zone(BasicNewsRecipe):
title = u'Adventure Zone'
__author__ = 'fenuks'
description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.'
category = 'games'
language = 'pl'
BASEURL = 'http://www.adventure-zone.info/fusion/'
no_stylesheets = True
extra_css = '.image {float: left; margin-right: 5px;}'
oldest_article = 20
max_articles_per_feed = 100
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
index = 'http://www.adventure-zone.info/fusion/'
remove_attributes = ['style']
use_embedded_content = False
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
(re.compile(r'</?table.*?>'), lambda match: ''),
(re.compile(r'</?tbody.*?>'), lambda match: '')]
remove_tags_before = dict(name='td', attrs={'class':'main-bg'})
remove_tags = [dict(name='img', attrs={'alt':'Drukuj'})]
remove_tags_after = dict(id='comments')
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; } img.news-category {float: left; margin-right: 5px;}'
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
'''def get_cover_url(self):
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
cover=soup.find(id='box_OstatninumerAZ')
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
return getattr(self, 'cover_url', self.cover_url)'''
def populate_article_metadata(self, article, soup, first):
result = re.search('(.+) - Adventure Zone', soup.title.string)
if result:
result = result.group(1)
else:
result = soup.body.find('strong')
if result:
result = result.string
if result:
result = result.replace('&amp;', '&')
result = result.replace('&#39;', '')
article.title = result
keep_only_tags = [dict(attrs={'class':'content'})]
remove_tags = [dict(attrs={'class':'footer'})]
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')]
def skip_ad_pages(self, soup):
skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
skip_tag = soup.body.find(attrs={'class':'content'})
skip_tag = skip_tag.findAll(name='a')
title = soup.title.string.lower()
if (('zapowied' in title) or ('recenzj' in title) or ('solucj' in title) or ('poradnik' in title)):
@ -49,20 +26,10 @@ class Adventure_zone(BasicNewsRecipe):
if r.strong and r.strong.string:
word=r.strong.string.lower()
if (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
return self.index_to_soup(self.BASEURL+r['href'], raw=True)
def preprocess_html(self, soup):
footer=soup.find(attrs={'class':'news-footer middle-border'})
r = soup.find(name='td', attrs={'class':'capmain'})
if r:
r.name='h1'
for item in soup.findAll(name=['tr', 'td']):
item.name='div'
if footer and len(footer('a'))>=2:
footer('a')[1].extract()
for item in soup.findAll(style=True):
del item['style']
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
for link in soup.findAll('a', href=True):
if not link['href'].startswith('http'):
link['href'] = self.BASEURL + link['href']
return soup

View File

@ -13,6 +13,7 @@ class Astroflesz(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_empty_feeds = True
remove_attributes = ['style']
keep_only_tags = [dict(id="k2Container")]
remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})

View File

@ -6,12 +6,10 @@ __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \
2013, Tomasz Długosz, tomek3d@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
from datetime import date
import re
from lxml import html
class GN(BasicNewsRecipe):
EDITION = 0
__author__ = 'Piotr Kontek, Tomasz Długosz'
title = u'Gość Niedzielny'
@ -20,83 +18,23 @@ class GN(BasicNewsRecipe):
no_stylesheets = True
language = 'pl'
remove_javascript = True
temp_files = []
articles_are_obfuscated = True
def find_last_issue(self):
raw = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/', raw=True)
doc = html.fromstring(raw)
page = doc.xpath('//div[@class="c"]//div[@class="search-result"]/div[1]/div[2]/h1//a/@href')
def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
source = br.response().read()
page = self.index_to_soup(source)
main_section = page.find('div',attrs={'class':'txt doc_prnt_prv'})
title = main_section.find('h2')
info = main_section.find('div', attrs={'class' : 'cf doc_info'})
authors = info.find(attrs={'class':'l'})
article = str(main_section.find('p', attrs={'class' : 'doc_lead'}))
first = True
for p in main_section.findAll('p', attrs={'class':None}, recursive=False):
if first and p.find('img') != None:
article += '<p>'
article += str(p.find('img')).replace('src="/files/','src="http://www.gosc.pl/files/')
article += '<font size="-2">'
for s in p.findAll('span'):
article += self.tag_to_string(s)
article += '</font></p>'
else:
article += str(p).replace('src="/files/','src="http://www.gosc.pl/files/')
first = False
limiter = main_section.find('p', attrs={'class' : 'limiter'})
if limiter:
article += str(limiter)
html = unicode(title)
#sometimes authors are not filled in:
if authors:
html += unicode(authors) + unicode(article)
else:
html += unicode(article)
self.temp_files.append(PersistentTemporaryFile('_temparse.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
def find_last_issue(self, year):
soup = self.index_to_soup('http://gosc.pl/wyszukaj/wydania/3.Gosc-Niedzielny/rok/' + str(year))
#szukam zdjęcia i linka do poprzedniego pełnego numeru
first = True
for d in soup.findAll('div', attrs={'class':'l release_preview_l'}):
img = d.find('img')
if img != None:
a = img.parent
self.EDITION = a['href']
#this was preventing kindles from moving old issues to 'Back Issues' category:
#self.title = img['alt']
self.cover_url = 'http://www.gosc.pl' + img['src']
if year != date.today().year or not first:
break
first = False
return page[1]
def parse_index(self):
year = date.today().year
self.find_last_issue(year)
##jeśli to pierwszy numer w roku trzeba pobrać poprzedni rok
if self.EDITION == 0:
self.find_last_issue(year-1)
soup = self.index_to_soup('http://www.gosc.pl' + self.EDITION)
soup = self.index_to_soup('http://gosc.pl' + self.find_last_issue())
feeds = []
#wstepniak
a = soup.find('div',attrs={'class':'release-wp-b'}).find('a')
articles = [
{'title' : self.tag_to_string(a),
'url' : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/'),
'date' : '',
'description' : ''}
]
'url' : 'http://www.gosc.pl' + a['href'].replace('/doc/','/doc_pr/')
}]
feeds.append((u'Wstępniak',articles))
#kategorie
for addr in soup.findAll('a',attrs={'href':re.compile('kategoria')}):
@ -113,16 +51,46 @@ class GN(BasicNewsRecipe):
art = a.find('a')
yield {
'title' : self.tag_to_string(art),
'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
'date' : '',
'description' : ''
'url' : 'http://www.gosc.pl' + art['href']
}
for a in main_block.findAll('div', attrs={'class':'sr-document'}):
art = a.find('a')
yield {
'title' : self.tag_to_string(art),
'url' : 'http://www.gosc.pl' + art['href'].replace('/doc/','/doc_pr/'),
'date' : '',
'description' : ''
'url' : 'http://www.gosc.pl' + art['href']
}
def append_page(self, soup, appendtag):
chpage= appendtag.find(attrs={'class':'pgr_nrs'})
if chpage:
for page in chpage.findAll('a'):
soup2 = self.index_to_soup('http://gosc.pl' + page['href'])
pagetext = soup2.find(attrs={'class':'intextAd'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
'''
for image_div in soup.findAll(attrs={'class':'doc_image'}):
link =
if 'm.jpg' in image['src']:
image['src'] = image['src'].replace('m.jpg', '.jpg')
'''
return soup
keep_only_tags = [
dict(name='div', attrs={'class':'cf txt'})
]
remove_tags = [
dict(name='p', attrs={'class':['r tr', 'l l-2', 'wykop']}),
dict(name='div', attrs={'class':['doc_actions', 'pgr', 'fr1_cl']}),
dict(name='div', attrs={'id':'vote'})
]
extra_css = '''
h1 {font-size:150%}
div#doc_image {font-style:italic; font-size:70%}
p.limiter {font-size:150%; font-weight: bold}
'''

View File

@ -1,16 +1,61 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Handelsblatt(BasicNewsRecipe):
title = u'Handelsblatt'
__author__ = 'malfi'
oldest_article = 7
__author__ = 'malfi' # modified by Hegi, last change 2013-05-20
description = u'Handelsblatt - basierend auf den RRS-Feeds von Handelsblatt.de'
tags = 'Nachrichten, Blog, Wirtschaft'
publisher = 'Verlagsgruppe Handelsblatt GmbH'
category = 'business, economy, news, Germany'
publication_type = 'daily newspaper'
language = 'de_DE'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
# cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
language = 'de'
simultaneous_downloads= 20
remove_tags_before = dict(attrs={'class':'hcf-overline'})
remove_tags_after = dict(attrs={'class':'hcf-footer'})
auto_cleanup = False
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
# don't duplicate articles from "Schlagzeilen" / "Exklusiv" to other rubrics
ignore_duplicate_articles = {'title', 'url'}
# if you want to reduce size for an b/w or E-ink device, uncomment this:
# compress_news_images = True
# compress_news_images_auto_size = 16
# scale_news_images = (400,300)
timefmt = ' [%a, %d %b %Y]'
conversion_options = {'smarten_punctuation' : True,
'authors' : publisher,
'publisher' : publisher}
language = 'de_DE'
encoding = 'UTF-8'
cover_source = 'http://www.handelsblatt-shop.com/epaper/482/'
# masthead_url = 'http://www.handelsblatt.com/images/hb_logo/6543086/1-format3.jpg'
masthead_url = 'http://www.handelsblatt-chemie.de/wp-content/uploads/2012/01/hb-logo.gif'
def get_cover_url(self):
cover_source_soup = self.index_to_soup(self.cover_source)
preview_image_div = cover_source_soup.find(attrs={'class':'vorschau'})
return 'http://www.handelsblatt-shop.com'+preview_image_div.a.img['src']
# remove_tags_before = dict(attrs={'class':'hcf-overline'})
# remove_tags_after = dict(attrs={'class':'hcf-footer'})
# Alternatively use this:
keep_only_tags = [
dict(name='div', attrs={'class':['hcf-column hcf-column1 hcf-teasercontainer hcf-maincol']}),
dict(name='div', attrs={'id':['contentMain']})
]
remove_tags = [
dict(name='div', attrs={'class':['hcf-link-block hcf-faq-open', 'hcf-article-related']})
]
feeds = [
(u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
@ -25,15 +70,19 @@ class Handelsblatt(BasicNewsRecipe):
(u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
# Insert ". " after "Place" in <span class="hcf-location-mark">Place</span>
# If you use .epub format you could also do this as extra_css '.hcf-location-mark:after {content: ". "}'
preprocess_regexps = [(re.compile(r'(<span class="hcf-location-mark">[^<]*)(</span>)',
re.DOTALL|re.IGNORECASE), lambda match: match.group(1) + '. ' + match.group(2))]
extra_css = 'h1 {font-size: 1.6em; text-align: left} \
h2 {font-size: 1em; font-style: italic; font-weight: normal} \
h3 {font-size: 1.3em;text-align: left} \
h4, h5, h6, a {font-size: 1em;text-align: left} \
.hcf-caption {font-size: 1em;text-align: left; font-style: italic} \
.hcf-location-mark {font-style: italic}'
def print_version(self, url):
url = url.split('/')
url[-1] = 'v_detail_tab_print,'+url[-1]
url = '/'.join(url)
return url
main, sep, id = url.rpartition('/')
return main + '/v_detail_tab_print/' + id

View File

@ -13,11 +13,12 @@ class Histmag(BasicNewsRecipe):
__author__ = 'matek09'
description = u"Artykuly historyczne i publicystyczne"
encoding = 'utf-8'
extra_css = '''.center img {display: block;}'''
#preprocess_regexps = [(re.compile(r'</span>'), lambda match: '</span><br><br>'),(re.compile(r'<span>'), lambda match: '<br><br><span>')]
no_stylesheets = True
language = 'pl'
remove_javascript = True
keep_only_tags=[dict(id='article')]
remove_tags=[dict(name = 'p', attrs = {'class' : 'article-tags'})]
remove_tags=[dict(name = 'p', attrs = {'class' : 'article-tags'}), dict(attrs={'class':'twitter-share-button'})]
feeds = [(u'Wszystkie', u'http://histmag.org/rss/wszystkie.xml'), (u'Wydarzenia', u'http://histmag.org/rss/wydarzenia.xml'), (u'Recenzje', u'http://histmag.org/rss/recenzje.xml'), (u'Artykuły historyczne', u'http://histmag.org/rss/historia.xml'), (u'Publicystyka', u'http://histmag.org/rss/publicystyka.xml')]

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

BIN
recipes/icons/gs24_pl.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 428 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 541 B

BIN
recipes/icons/pc_lab.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 697 B

BIN
recipes/icons/polityka.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 346 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 418 B

View File

@ -20,7 +20,7 @@ class OSNewsRecipe(BasicNewsRecipe):
remove_javascript = True
encoding = 'utf-8'
use_embedded_content = False;
remove_empty_feeds = True
oldest_article = 7
max_articles_per_feed = 100
cover_url='http://osnews.pl/wp-content/themes/osnews/img/logo.png'
@ -31,22 +31,18 @@ class OSNewsRecipe(BasicNewsRecipe):
'''
feeds = [
(u'OSNews.pl', u'http://feeds.feedburner.com/OSnewspl')
(u'Niusy', u'http://feeds.feedburner.com/OSnewspl'),
(u'Wylęgarnia', u'http://feeds.feedburner.com/osnewspl_nowe')
]
keep_only_tags = [
dict(name = 'a', attrs = {'class' : 'news-heading'}),
dict(name = 'div', attrs = {'class' : 'newsinformations'}),
dict(name = 'div', attrs = {'id' : 'news-content'})
dict(name = 'div', attrs = {'id' : 'content'})
]
remove_tags = [
dict(name = 'div', attrs = {'class' : 'sociable'}),
dict(name = 'div', attrs = {'class' : 'post_prev'}),
dict(name = 'div', attrs = {'class' : 'post_next'}),
dict(name = 'div', attrs = {'class' : 'clr'}),
dict(name = 'div', attrs = {'class' : 'tw_button'}),
dict(name = 'div', attrs = {'style' : 'width:56px;height:60px;float:left;margin-right:10px'})
dict(name = 'div', attrs = {'class' : ['newstags', 'tw_button', 'post_prev']}),
dict(name = 'div', attrs = {'id' : 'newspage_upinfo'}),
]
preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span')]
remove_tags_after = dict(name = 'div', attrs = {'class' : 'post_prev'})
preprocess_regexps = [(re.compile(u'</span>Komentarze: \(?[0-9]+\)? ?<span'), lambda match: '</span><span'), (re.compile(u'<iframe.+?</iframe>'), lambda match: '')]

View File

@ -0,0 +1,86 @@
__license__ = 'GPL v3'
__copyright__ = '2013, Armin Geller'
'''
Fetch WirtschaftsWoche Online
'''
import re
# import time
from calibre.web.feeds.news import BasicNewsRecipe
class WirtschaftsWocheOnline(BasicNewsRecipe):
title = u'WirtschaftsWoche Online'
__author__ = 'Hegi' # Update AGE 2013-01-05; Modified by Hegi 2013-04-28
description = u'Wirtschaftswoche Online - basierend auf den RRS-Feeds von Wiwo.de'
tags = 'Nachrichten, Blog, Wirtschaft'
publisher = 'Verlagsgruppe Handelsblatt GmbH / Redaktion WirtschaftsWoche Online'
category = 'business, economy, news, Germany'
publication_type = 'weekly magazine'
language = 'de'
oldest_article = 7
max_articles_per_feed = 100
simultaneous_downloads= 20
auto_cleanup = False
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
# don't duplicate articles from "Schlagzeilen" / "Exklusiv" to other rubrics
ignore_duplicate_articles = {'title', 'url'}
# if you want to reduce size for an b/w or E-ink device, uncomment this:
# compress_news_images = True
# compress_news_images_auto_size = 16
# scale_news_images = (400,300)
timefmt = ' [%a, %d %b %Y]'
conversion_options = {'smarten_punctuation' : True,
'authors' : publisher,
'publisher' : publisher}
language = 'de_DE'
encoding = 'UTF-8'
cover_source = 'http://www.wiwo-shop.de/wirtschaftswoche/wirtschaftswoche-emagazin-p1952.html'
masthead_url = 'http://www.wiwo.de/images/wiwo_logo/5748610/1-formatOriginal.png'
def get_cover_url(self):
cover_source_soup = self.index_to_soup(self.cover_source)
preview_image_div = cover_source_soup.find(attrs={'class':'container vorschau'})
return 'http://www.wiwo-shop.de'+preview_image_div.a.img['src']
# Insert ". " after "Place" in <span class="hcf-location-mark">Place</span>
# If you use .epub format you could also do this as extra_css '.hcf-location-mark:after {content: ". "}'
preprocess_regexps = [(re.compile(r'(<span class="hcf-location-mark">[^<]*)(</span>)',
re.DOTALL|re.IGNORECASE), lambda match: match.group(1) + '. ' + match.group(2))]
extra_css = 'h1 {font-size: 1.6em; text-align: left} \
h2 {font-size: 1em; font-style: italic; font-weight: normal} \
h3 {font-size: 1.3em;text-align: left} \
h4, h5, h6, a {font-size: 1em;text-align: left} \
.hcf-caption {font-size: 1em;text-align: left; font-style: italic} \
.hcf-location-mark {font-style: italic}'
keep_only_tags = [
dict(name='div', attrs={'class':['hcf-column hcf-column1 hcf-teasercontainer hcf-maincol']}),
dict(name='div', attrs={'id':['contentMain']})
]
remove_tags = [
dict(name='div', attrs={'class':['hcf-link-block hcf-faq-open', 'hcf-article-related']})
]
feeds = [
(u'Schlagzeilen', u'http://www.wiwo.de/contentexport/feed/rss/schlagzeilen'),
(u'Exklusiv', u'http://www.wiwo.de/contentexport/feed/rss/exklusiv'),
# (u'Themen', u'http://www.wiwo.de/contentexport/feed/rss/themen'), # AGE no print version
(u'Unternehmen', u'http://www.wiwo.de/contentexport/feed/rss/unternehmen'),
(u'Finanzen', u'http://www.wiwo.de/contentexport/feed/rss/finanzen'),
(u'Politik', u'http://www.wiwo.de/contentexport/feed/rss/politik'),
(u'Erfolg', u'http://www.wiwo.de/contentexport/feed/rss/erfolg'),
(u'Technologie', u'http://www.wiwo.de/contentexport/feed/rss/technologie'),
# (u'Green-WiWo', u'http://green.wiwo.de/feed/rss/') # AGE no print version
]
def print_version(self, url):
main, sep, id = url.rpartition('/')
return main + '/v_detail_tab_print/' + id

View File

@ -112,7 +112,7 @@ class WallStreetJournal(BasicNewsRecipe):
if date is not None:
self.timefmt = ' [%s]'%self.tag_to_string(date)
cov = soup.find('div', attrs={'class':'itpSectionHeaderPdf'})
cov = soup.find('div', attrs={'class':lambda x: x and 'itpSectionHeaderPdf' in x.split()})
if cov is not None:
a = cov.find('a', href=True)
if a is not None:

View File

@ -13,14 +13,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2013-03-23 10:17+0000\n"
"PO-Revision-Date: 2013-05-21 06:13+0000\n"
"Last-Translator: Глория Хрусталёва <gloriya@hushmail.com>\n"
"Language-Team: Russian <debian-l10n-russian@lists.debian.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2013-03-24 04:45+0000\n"
"X-Generator: Launchpad (build 16540)\n"
"X-Launchpad-Export-Date: 2013-05-22 04:38+0000\n"
"X-Generator: Launchpad (build 16626)\n"
"Language: ru\n"
#. name for aaa
@ -5361,7 +5361,7 @@ msgstr ""
#. name for coa
msgid "Malay; Cocos Islands"
msgstr ""
msgstr "Малайский; Кокосовые острова"
#. name for cob
msgid "Chicomuceltec"

View File

@ -30,14 +30,14 @@ msgstr ""
"Report-Msgid-Bugs-To: Debian iso-codes team <pkg-isocodes-"
"devel@lists.alioth.debian.org>\n"
"POT-Creation-Date: 2011-11-25 14:01+0000\n"
"PO-Revision-Date: 2013-05-13 05:58+0000\n"
"PO-Revision-Date: 2013-05-19 09:23+0000\n"
"Last-Translator: Merarom <Unknown>\n"
"Language-Team: Swedish <sv@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Launchpad-Export-Date: 2013-05-14 05:30+0000\n"
"X-Generator: Launchpad (build 16617)\n"
"X-Launchpad-Export-Date: 2013-05-20 05:34+0000\n"
"X-Generator: Launchpad (build 16626)\n"
"Language: sv\n"
#. name for aaa
@ -4582,35 +4582,35 @@ msgstr ""
#. name for bzl
msgid "Boano (Sulawesi)"
msgstr ""
msgstr "Boano (Sulawesi/Cebeles)"
#. name for bzm
msgid "Bolondo"
msgstr ""
msgstr "Bolondo"
#. name for bzn
msgid "Boano (Maluku)"
msgstr ""
msgstr "Boano (Maluku)"
#. name for bzo
msgid "Bozaba"
msgstr ""
msgstr "Bozaba"
#. name for bzp
msgid "Kemberano"
msgstr ""
msgstr "Kemberano"
#. name for bzq
msgid "Buli (Indonesia)"
msgstr ""
msgstr "Buli (Indonesien)"
#. name for bzr
msgid "Biri"
msgstr ""
msgstr "Biri"
#. name for bzs
msgid "Brazilian Sign Language"
msgstr ""
msgstr "Brasilianskt teckenspråk"
#. name for bzt
msgid "Brithenig"
@ -4618,39 +4618,39 @@ msgstr ""
#. name for bzu
msgid "Burmeso"
msgstr ""
msgstr "Burmanska"
#. name for bzv
msgid "Bebe"
msgstr ""
msgstr "Bebe"
#. name for bzw
msgid "Basa (Nigeria)"
msgstr ""
msgstr "Basa (Nigeria)"
#. name for bzx
msgid "Bozo; Kɛlɛngaxo"
msgstr ""
msgstr "Bozo; (Mali)"
#. name for bzy
msgid "Obanliku"
msgstr ""
msgstr "Obanliku"
#. name for bzz
msgid "Evant"
msgstr ""
msgstr "Evant"
#. name for caa
msgid "Chortí"
msgstr ""
msgstr "Chortí"
#. name for cab
msgid "Garifuna"
msgstr ""
msgstr "Garifuna"
#. name for cac
msgid "Chuj"
msgstr ""
msgstr "Chuj"
#. name for cad
msgid "Caddo"
@ -4658,59 +4658,59 @@ msgstr "Caddo"
#. name for cae
msgid "Lehar"
msgstr ""
msgstr "Lezginska"
#. name for caf
msgid "Carrier; Southern"
msgstr ""
msgstr "Carrier; södra"
#. name for cag
msgid "Nivaclé"
msgstr ""
msgstr "Nivaclé"
#. name for cah
msgid "Cahuarano"
msgstr ""
msgstr "Cahuarano; Peru"
#. name for caj
msgid "Chané"
msgstr ""
msgstr "Chané"
#. name for cak
msgid "Kaqchikel"
msgstr ""
msgstr "Kaqchikel"
#. name for cal
msgid "Carolinian"
msgstr ""
msgstr "Carolinian"
#. name for cam
msgid "Cemuhî"
msgstr ""
msgstr "Cemuhî"
#. name for can
msgid "Chambri"
msgstr ""
msgstr "Chambri"
#. name for cao
msgid "Chácobo"
msgstr ""
msgstr "Chácobo"
#. name for cap
msgid "Chipaya"
msgstr ""
msgstr "Chipaya"
#. name for caq
msgid "Nicobarese; Car"
msgstr ""
msgstr "Nicobarese; Car"
#. name for car
msgid "Carib; Galibi"
msgstr ""
msgstr "Carib; Galibi"
#. name for cas
msgid "Tsimané"
msgstr ""
msgstr "Tsimshian; Britiska Columbia"
#. name for cat
msgid "Catalan"
@ -4718,15 +4718,15 @@ msgstr "Katalanska"
#. name for cav
msgid "Cavineña"
msgstr ""
msgstr "Cavineña"
#. name for caw
msgid "Callawalla"
msgstr ""
msgstr "Callawalla; Bolivia"
#. name for cax
msgid "Chiquitano"
msgstr ""
msgstr "Chiquitano; Bolivia"
#. name for cay
msgid "Cayuga"
@ -4734,115 +4734,115 @@ msgstr ""
#. name for caz
msgid "Canichana"
msgstr ""
msgstr "Canichana"
#. name for cbb
msgid "Cabiyarí"
msgstr ""
msgstr "Cabiyarí"
#. name for cbc
msgid "Carapana"
msgstr ""
msgstr "Carapana; Colombia & Brasilien"
#. name for cbd
msgid "Carijona"
msgstr ""
msgstr "Carijona"
#. name for cbe
msgid "Chipiajes"
msgstr ""
msgstr "Chipiajes"
#. name for cbg
msgid "Chimila"
msgstr ""
msgstr "Chimila"
#. name for cbh
msgid "Cagua"
msgstr ""
msgstr "Cagua;Venezuela"
#. name for cbi
msgid "Chachi"
msgstr ""
msgstr "Chachi; Ecuador"
#. name for cbj
msgid "Ede Cabe"
msgstr ""
msgstr "Ede Cabe"
#. name for cbk
msgid "Chavacano"
msgstr ""
msgstr "Chavacano; Filippinerna"
#. name for cbl
msgid "Chin; Bualkhaw"
msgstr ""
msgstr "Chin; Bualkhaw"
#. name for cbn
msgid "Nyahkur"
msgstr ""
msgstr "Nyahkur;Australien"
#. name for cbo
msgid "Izora"
msgstr ""
msgstr "Izora"
#. name for cbr
msgid "Cashibo-Cacataibo"
msgstr ""
msgstr "Cashibo-Cacataibo;Peru"
#. name for cbs
msgid "Cashinahua"
msgstr ""
msgstr "Cashinahua;Peru"
#. name for cbt
msgid "Chayahuita"
msgstr ""
msgstr "Chayahuita;Peru"
#. name for cbu
msgid "Candoshi-Shapra"
msgstr ""
msgstr "Candoshi-Shapra;Peru"
#. name for cbv
msgid "Cacua"
msgstr ""
msgstr "Cacua;Colombia"
#. name for cbw
msgid "Kinabalian"
msgstr ""
msgstr "Kinabalian;sydöstra Filippinerna"
#. name for cby
msgid "Carabayo"
msgstr ""
msgstr "Carabayo;Colombia"
#. name for cca
msgid "Cauca"
msgstr ""
msgstr "Cauca;Colombia & Panama"
#. name for ccc
msgid "Chamicuro"
msgstr ""
msgstr "Chamicuro;Peru"
#. name for ccd
msgid "Creole; Cafundo"
msgstr ""
msgstr "Creole; Cafundo; Brasilien"
#. name for cce
msgid "Chopi"
msgstr ""
msgstr "Chopi;Moçambique"
#. name for ccg
msgid "Daka; Samba"
msgstr ""
msgstr "Daka; Samba, Nigeria"
#. name for cch
msgid "Atsam"
msgstr ""
msgstr "Atsam"
#. name for ccj
msgid "Kasanga"
msgstr ""
msgstr "Kasanga"
#. name for ccl
msgid "Cutchi-Swahili"
msgstr ""
msgstr "Cutchi-Swahili"
#. name for ccm
msgid "Creole Malay; Malaccan"
@ -4850,75 +4850,75 @@ msgstr ""
#. name for cco
msgid "Chinantec; Comaltepec"
msgstr ""
msgstr "Chinantec; Comaltepec"
#. name for ccp
msgid "Chakma"
msgstr ""
msgstr "Chakma"
#. name for ccq
msgid "Chaungtha"
msgstr ""
msgstr "Chaungtha"
#. name for ccr
msgid "Cacaopera"
msgstr ""
msgstr "Cacaopera"
#. name for cda
msgid "Choni"
msgstr ""
msgstr "Choni"
#. name for cde
msgid "Chenchu"
msgstr ""
msgstr "Chenchu"
#. name for cdf
msgid "Chiru"
msgstr ""
msgstr "Chiru"
#. name for cdg
msgid "Chamari"
msgstr ""
msgstr "Chamari"
#. name for cdh
msgid "Chambeali"
msgstr ""
msgstr "Chambeali"
#. name for cdi
msgid "Chodri"
msgstr ""
msgstr "Chodri"
#. name for cdj
msgid "Churahi"
msgstr ""
msgstr "Churahi"
#. name for cdm
msgid "Chepang"
msgstr ""
msgstr "Chepang"
#. name for cdn
msgid "Chaudangsi"
msgstr ""
msgstr "Chaudangsi"
#. name for cdo
msgid "Chinese; Min Dong"
msgstr ""
msgstr "Kinesiska; Min Dong"
#. name for cdr
msgid "Cinda-Regi-Tiyal"
msgstr ""
msgstr "Cinda-Regi-Tiyal"
#. name for cds
msgid "Chadian Sign Language"
msgstr ""
msgstr "Chadian teckenspråk"
#. name for cdy
msgid "Chadong"
msgstr ""
msgstr "Chadong"
#. name for cdz
msgid "Koda"
msgstr ""
msgstr "Koda"
#. name for cea
msgid "Chehalis; Lower"
@ -4930,11 +4930,11 @@ msgstr "Cebuano"
#. name for ceg
msgid "Chamacoco"
msgstr ""
msgstr "Chamacoco"
#. name for cen
msgid "Cen"
msgstr ""
msgstr "Cen"
#. name for ces
msgid "Czech"
@ -4942,7 +4942,7 @@ msgstr "Tjeckiska"
#. name for cet
msgid "Centúúm"
msgstr ""
msgstr "Centúúm"
#. name for cfa
msgid "Dijim-Bwilim"
@ -4950,31 +4950,31 @@ msgstr ""
#. name for cfd
msgid "Cara"
msgstr ""
msgstr "Cara"
#. name for cfg
msgid "Como Karim"
msgstr ""
msgstr "Como Karim"
#. name for cfm
msgid "Chin; Falam"
msgstr ""
msgstr "Chin; Falam"
#. name for cga
msgid "Changriwa"
msgstr ""
msgstr "Changriwa"
#. name for cgc
msgid "Kagayanen"
msgstr ""
msgstr "Kagayanen"
#. name for cgg
msgid "Chiga"
msgstr ""
msgstr "Chiga"
#. name for cgk
msgid "Chocangacakha"
msgstr ""
msgstr "Chocangacakha; Butan"
#. name for cha
msgid "Chamorro"
@ -4986,11 +4986,11 @@ msgstr "Chibcha"
#. name for chc
msgid "Catawba"
msgstr ""
msgstr "Catawba"
#. name for chd
msgid "Chontal; Highland Oaxaca"
msgstr ""
msgstr "Chontal; Highland Oaxaca; Mexico"
#. name for che
msgid "Chechen"
@ -4998,7 +4998,7 @@ msgstr "Tjetjenska"
#. name for chf
msgid "Chontal; Tabasco"
msgstr ""
msgstr "Chontal; Tabasco"
#. name for chg
msgid "Chagatai"
@ -5006,7 +5006,7 @@ msgstr "Chagatai"
#. name for chh
msgid "Chinook"
msgstr ""
msgstr "Chinook"
#. name for chj
msgid "Chinantec; Ojitlán"

View File

@ -4,7 +4,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = u'calibre'
numeric_version = (0, 9, 31)
numeric_version = (0, 9, 32)
__version__ = u'.'.join(map(unicode, numeric_version))
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -320,7 +320,7 @@ class ITUNES(DriverBase):
self.verbose = self.settings().extra_customization[self.DEBUG_LOGGING]
if self.verbose:
logger().info("%s.__init__():" % self.__class__.__name__)
logger().info(" Debug logging enabled in iTunes plugin settings")
logger().info(" Debug logging enabled")
@property
def cache_dir(self):
@ -1288,7 +1288,7 @@ class ITUNES(DriverBase):
logger().error(" failed to add '%s' to Device|Books" % metadata.title)
raise UserFeedback("Unable to add '%s' in direct connect mode" % metadata.title,
details=None, level=UserFeedback.ERROR)
self._wait_for_writable_metadata(added)
#self._wait_for_writable_metadata(added)
return added
elif iswindows:
@ -1471,6 +1471,7 @@ class ITUNES(DriverBase):
if self.verbose:
logger().info(" %s._cover_to_thumb()" % self.__class__.__name__)
#logger().info("db_added: %s lb_added: %s" % (db_added, lb_added))
thumb = None
if metadata.cover:
@ -1514,13 +1515,13 @@ class ITUNES(DriverBase):
'''
if lb_added:
delay = 2.0
self._wait_for_writable_metadata(db_added, delay=delay)
# Wait for updatable artwork
attempts = 9
while attempts:
try:
lb_added.artworks[1].data_.set(cover_data)
break
except:
attempts -= 1
time.sleep(delay)
@ -3229,6 +3230,11 @@ class ITUNES(DriverBase):
if self.verbose:
logger().info(" %s._wait_for_writable_metadata()" % self.__class__.__name__)
if not db_added:
if self.verbose:
logger().info("called from %s() with null db_added" % sys._getframe(1).f_code.co_name)
return
attempts = 9
while attempts:
try:

View File

@ -279,11 +279,11 @@ class POCKETBOOK602(USBMS):
class POCKETBOOK622(POCKETBOOK602):
name = 'PocketBook 622 Device Interface'
description = _('Communicate with the PocketBook 622 reader.')
description = _('Communicate with the PocketBook 622 and 623 readers.')
EBOOK_DIR_MAIN = ''
VENDOR_ID = [0x0489]
PRODUCT_ID = [0xe107]
PRODUCT_ID = [0xe107, 0xcff1]
BCD = [0x0326]
VENDOR_NAME = 'LINUX'

View File

@ -224,16 +224,19 @@ class libiMobileDevice():
def copy_to_iDevice(self, src, dst):
'''
High-level convenience method to copy src on local filesystem to
High-level convenience method to copy src from local filesystem to
dst on iDevice.
Assumed to be a binary file (epub, sqlite, etc)
src: file on local filesystem
dst: file to be created on iOS filesystem
'''
self._log_location("src='%s', dst='%s'" % (src, dst))
with open(src) as f:
self._log_location("src=%s, dst=%s" % (repr(src), repr(dst)))
mode = 'rb'
with open(src, mode) as f:
content = bytearray(f.read())
mode = 'wb'
handle = self._afc_file_open(dst, mode=mode)
handle = self._afc_file_open(str(dst), mode=mode)
if handle is not None:
success = self._afc_file_write(handle, content, mode=mode)
if self.verbose:
@ -533,7 +536,7 @@ class libiMobileDevice():
else:
if self.verbose:
self.log(" could not open file")
raise libiMobileDeviceIOException("could not open file '%s' for reading" % path)
raise libiMobileDeviceIOException("could not open file %s for reading" % repr(path))
return data
@ -800,7 +803,7 @@ class libiMobileDevice():
error: (afc_error_t) AFC_E_SUCCESS (0) on success or AFC_E_* error value
'''
self._log_location("'%s', mode='%s'" % (filename, mode))
self._log_location("%s, mode='%s'" % (repr(filename), mode))
handle = c_ulonglong(0)
@ -1682,6 +1685,18 @@ class libiMobileDevice():
raise libiMobileDeviceException(error_description)
# ~~~ logging ~~~
def _log_diagnostic(self, msg=None):
'''
Print msg to console
'''
if not self.verbose:
return
if msg:
debug_print(" %s" % msg)
else:
debug_print()
def _log_location(self, *args):
'''
'''

View File

@ -74,7 +74,7 @@ def read_border(parent, dest):
for border in XPath('./w:pBdr')(parent):
for edge in ('left', 'top', 'right', 'bottom'):
for elem in XPath('./w:%s' % edge):
for elem in XPath('./w:%s' % edge)(border):
color = get(elem, 'w:color')
if color is not None:
vals['border_%s_color' % edge] = simple_color(color)
@ -151,8 +151,8 @@ def read_spacing(parent, dest):
l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
if l is not None:
lh = simple_float(l, 0.05) if lr in {'exactly', 'atLeast'} else simple_float(l, 1/240.0)
line_height = '%.3g%s' % (lh, 'pt' if lr in {'exactly', 'atLeast'} else '')
lh = simple_float(l, 0.05) if lr in {'exact', 'atLeast'} else simple_float(l, 1/240.0)
line_height = '%.3g%s' % (lh, 'pt' if lr in {'exact', 'atLeast'} else '')
setattr(dest, 'margin_top', padding_top)
setattr(dest, 'margin_bottom', padding_bottom)
@ -189,6 +189,89 @@ def read_numbering(parent, dest):
val = (num_id, lvl) if num_id is not None or lvl is not None else inherit
setattr(dest, 'numbering', val)
class Frame(object):
all_attributes = ('drop_cap', 'h', 'w', 'h_anchor', 'h_rule', 'v_anchor', 'wrap',
'h_space', 'v_space', 'lines', 'x_align', 'y_align', 'x', 'y')
def __init__(self, fp):
self.drop_cap = get(fp, 'w:dropCap', 'none')
try:
self.h = int(get(fp, 'w:h'))/20
except (ValueError, TypeError):
self.h = 0
try:
self.w = int(get(fp, 'w:w'))/20
except (ValueError, TypeError):
self.w = None
try:
self.x = int(get(fp, 'w:x'))/20
except (ValueError, TypeError):
self.x = 0
try:
self.y = int(get(fp, 'w:y'))/20
except (ValueError, TypeError):
self.y = 0
self.h_anchor = get(fp, 'w:hAnchor', 'page')
self.h_rule = get(fp, 'w:hRule', 'auto')
self.v_anchor = get(fp, 'w:vAnchor', 'page')
self.wrap = get(fp, 'w:wrap', 'around')
self.x_align = get(fp, 'w:xAlign')
self.y_align = get(fp, 'w:yAlign')
try:
self.h_space = int(get(fp, 'w:hSpace'))/20
except (ValueError, TypeError):
self.h_space = 0
try:
self.v_space = int(get(fp, 'w:vSpace'))/20
except (ValueError, TypeError):
self.v_space = 0
try:
self.lines = int(get(fp, 'w:lines'))
except (ValueError, TypeError):
self.lines = 1
def css(self, page):
is_dropcap = self.drop_cap in {'drop', 'margin'}
ans = {'overflow': 'hidden'}
if is_dropcap:
ans['float'] = 'left'
ans['margin'] = '0'
ans['padding-right'] = '0.2em'
else:
if self.h_rule != 'auto':
t = 'min-height' if self.h_rule == 'atLeast' else 'height'
ans[t] = '%.3gpt' % self.h
if self.w is not None:
ans['width'] = '%.3gpt' % self.w
ans['padding-top'] = ans['padding-bottom'] = '%.3gpt' % self.v_space
if self.wrap not in {None, 'none'}:
ans['padding-left'] = ans['padding-right'] = '%.3gpt' % self.h_space
if self.x_align is None:
fl = 'left' if self.x/page.width < 0.5 else 'right'
else:
fl = 'right' if self.x_align == 'right' else 'left'
ans['float'] = fl
return ans
def __eq__(self, other):
for x in self.all_attributes:
if getattr(other, x, inherit) != getattr(self, x):
return False
return True
def __ne__(self, other):
return not self.__eq__(other)
def read_frame(parent, dest):
ans = inherit
for fp in XPath('./w:framePr')(parent):
ans = Frame(fp)
setattr(dest, 'frame', ans)
# }}}
class ParagraphStyle(object):
@ -208,7 +291,7 @@ class ParagraphStyle(object):
# Misc.
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
'numbering', 'font_family', 'font_size',
'numbering', 'font_family', 'font_size', 'frame',
)
def __init__(self, pPr=None):
@ -225,7 +308,7 @@ class ParagraphStyle(object):
):
setattr(self, p, binary_property(pPr, p))
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering'):
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering', 'frame'):
f = globals()['read_%s' % x]
f(pPr, self)
@ -286,5 +369,3 @@ class ParagraphStyle(object):
return self._css
# TODO: keepNext must be done at markup level

View File

@ -11,7 +11,7 @@ import os, sys, shutil
from lxml import etree
from calibre import walk, guess_type
from calibre.ebooks.metadata import string_to_authors
from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.docx import InvalidDOCX
from calibre.ebooks.docx.names import DOCUMENT, DOCPROPS, XPath, APPPROPS
@ -49,6 +49,7 @@ def read_doc_props(raw, mi):
aut.extend(string_to_authors(author.text))
if aut:
mi.authors = aut
mi.author_sort = authors_to_sort_string(aut)
desc = XPath('//dc:description')(root)
if desc:
@ -181,7 +182,9 @@ class DOCX(object):
else:
root = fromstring(raw)
for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
target = '/'.join((base, item.get('Target').lstrip('/')))
target = item.get('Target')
if item.get('TargetMode', None) != 'External':
target = '/'.join((base, target.lstrip('/')))
typ = item.get('Type')
Id = item.get('Id')
by_id[Id] = by_type[typ] = target

View File

@ -0,0 +1,62 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from collections import OrderedDict
from calibre.ebooks.docx.names import get, XPath, descendants
class Note(object):
def __init__(self, parent):
self.type = get(parent, 'w:type', 'normal')
self.parent = parent
def __iter__(self):
for p in descendants(self.parent, 'w:p'):
yield p
class Footnotes(object):
def __init__(self):
self.footnotes = {}
self.endnotes = {}
self.counter = 0
self.notes = OrderedDict()
def __call__(self, footnotes, endnotes):
if footnotes is not None:
for footnote in XPath('./w:footnote[@w:id]')(footnotes):
fid = get(footnote, 'w:id')
if fid:
self.footnotes[fid] = Note(footnote)
if endnotes is not None:
for endnote in XPath('./w:endnote[@w:id]')(endnotes):
fid = get(endnote, 'w:id')
if fid:
self.endnotes[fid] = Note(endnote)
def get_ref(self, ref):
fid = get(ref, 'w:id')
notes = self.footnotes if ref.tag.endswith('}footnoteReference') else self.endnotes
note = notes.get(fid, None)
if note is not None and note.type == 'normal':
self.counter += 1
anchor = 'note_%d' % self.counter
self.notes[anchor] = (type('')(self.counter), note)
return anchor, type('')(self.counter)
return None, None
def __iter__(self):
for anchor, (counter, note) in self.notes.iteritems():
yield anchor, counter, note
@property
def has_notes(self):
return bool(self.notes)

View File

@ -0,0 +1,205 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from lxml.html.builder import IMG
from calibre.ebooks.docx.names import XPath, get, barename
from calibre.utils.filenames import ascii_filename
from calibre.utils.imghdr import what
def emu_to_pt(x):
return x / 12700
def get_image_properties(parent):
width = height = None
for extent in XPath('./wp:extent')(parent):
try:
width = emu_to_pt(int(extent.get('cx')))
except (TypeError, ValueError):
pass
try:
height = emu_to_pt(int(extent.get('cy')))
except (TypeError, ValueError):
pass
ans = {}
if width is not None:
ans['width'] = '%.3gpt' % width
if height is not None:
ans['height'] = '%.3gpt' % height
alt = None
for docPr in XPath('./wp:docPr')(parent):
x = docPr.get('descr', None)
if x:
alt = x
if docPr.get('hidden', None) in {'true', 'on', '1'}:
ans['display'] = 'none'
return ans, alt
def get_image_margins(elem):
ans = {}
for w, css in {'L':'left', 'T':'top', 'R':'right', 'B':'bottom'}.iteritems():
val = elem.get('dist%s' % w, None)
if val is not None:
try:
val = emu_to_pt(val)
except (TypeError, ValueError):
continue
ans['padding-%s' % css] = '%.3gpt' % val
return ans
def get_hpos(anchor, page_width):
for ph in XPath('./wp:positionH')(anchor):
rp = ph.get('relativeFrom', None)
if rp == 'leftMargin':
return 0
if rp == 'rightMargin':
return 1
for align in XPath('./wp:align')(ph):
al = align.text
if al == 'left':
return 0
if al == 'center':
return 0.5
if al == 'right':
return 1
for po in XPath('./wp:posOffset')(ph):
try:
pos = emu_to_pt(int(po.text))
except (TypeError, ValueError):
continue
return pos/page_width
for sp in XPath('./wp:simplePos')(anchor):
try:
x = emu_to_pt(sp.get('x', None))
except (TypeError, ValueError):
continue
return x/page_width
return 0
class Images(object):
def __init__(self):
self.rid_map = {}
self.used = {}
self.names = set()
self.all_images = set()
def __call__(self, relationships_by_id):
self.rid_map = relationships_by_id
def generate_filename(self, rid, base=None):
if rid in self.used:
return self.used[rid]
raw = self.docx.read(self.rid_map[rid])
base = base or ascii_filename(self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_')
ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
base = base.rpartition('.')[0] + '.' + ext
exists = frozenset(self.used.itervalues())
c = 1
while base in exists:
n, e = base.rpartition('.')[0::2]
base = '%s-%d.%s' % (n, c, e)
c += 1
self.used[rid] = base
with open(os.path.join(self.dest_dir, base), 'wb') as f:
f.write(raw)
self.all_images.add('images/' + base)
return base
def pic_to_img(self, pic, alt=None):
name = None
for pr in XPath('descendant::pic:cNvPr')(pic):
name = pr.get('name', None)
if name:
name = ascii_filename(name).replace(' ', '_')
alt = pr.get('descr', None)
for a in XPath('descendant::a:blip[@r:embed]')(pic):
rid = get(a, 'r:embed')
if rid in self.rid_map:
src = self.generate_filename(rid, name)
img = IMG(src='images/%s' % src)
if alt:
img(alt=alt)
return img
def drawing_to_html(self, drawing, page):
# First process the inline pictures
for inline in XPath('./wp:inline')(drawing):
style, alt = get_image_properties(inline)
for pic in XPath('descendant::pic:pic')(inline):
ans = self.pic_to_img(pic, alt)
if ans is not None:
if style:
ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))
yield ans
# Now process the floats
for anchor in XPath('./wp:anchor')(drawing):
style, alt = get_image_properties(anchor)
self.get_float_properties(anchor, style, page)
for pic in XPath('descendant::pic:pic')(anchor):
ans = self.pic_to_img(pic, alt)
if ans is not None:
if style:
ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))
yield ans
def get_float_properties(self, anchor, style, page):
if 'display' not in style:
style['display'] = 'block'
padding = get_image_margins(anchor)
width = float(style.get('width', '100pt')[:-2])
page_width = page.width - page.margin_left - page.margin_right
hpos = get_hpos(anchor, page_width) + width/(2*page_width)
wrap_elem = None
dofloat = False
for child in reversed(anchor):
bt = barename(child.tag)
if bt in {'wrapNone', 'wrapSquare', 'wrapThrough', 'wrapTight', 'wrapTopAndBottom'}:
wrap_elem = child
dofloat = bt not in {'wrapNone', 'wrapTopAndBottom'}
break
if wrap_elem is not None:
padding.update(get_image_margins(wrap_elem))
wt = wrap_elem.get('wrapText', None)
hpos = 0 if wt == 'right' else 1 if wt == 'left' else hpos
if dofloat:
style['float'] = 'left' if hpos < 0.65 else 'right'
else:
ml, mr = (None, None) if hpos < 0.34 else ('auto', None) if hpos > 0.65 else ('auto', 'auto')
if ml is not None:
style['margin-left'] = ml
if mr is not None:
style['margin-right'] = mr
style.update(padding)
def to_html(self, elem, page, docx, dest_dir):
dest = os.path.join(dest_dir, 'images')
if not os.path.exists(dest):
os.mkdir(dest)
self.dest_dir, self.docx = dest, docx
if elem.tag.endswith('}drawing'):
for tag in self.drawing_to_html(elem, page):
yield tag
# TODO: Handle w:pict

View File

@ -6,14 +6,23 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from future_builtins import map
from lxml.etree import XPath as X
from calibre.utils.filenames import ascii_text
DOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'
DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable'
IMAGES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'
LINKS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink'
FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes'
ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes'
namespaces = {
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
@ -65,7 +74,32 @@ def barename(x):
def XML(x):
return '{%s}%s' % (namespaces['xml'], x)
def get(x, attr, default=None):
ns, name = attr.partition(':')[0::2]
return x.attrib.get('{%s}%s' % (namespaces[ns], name), default)
def expand(name):
ns, tag = name.partition(':')[0::2]
if ns:
tag = '{%s}%s' % (namespaces[ns], tag)
return tag
def get(x, attr, default=None):
return x.attrib.get(expand(attr), default)
def ancestor(elem, name):
tag = expand(name)
while elem is not None:
elem = elem.getparent()
if getattr(elem, 'tag', None) == tag:
return elem
def generate_anchor(name, existing):
x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_')
c = 1
while y in existing:
y = '%s_%d' % (x, c)
c += 1
return y
def children(elem, *args):
return elem.iterchildren(*map(expand, args))
def descendants(elem, *args):
return elem.iterdescendants(*map(expand, args))

View File

@ -13,6 +13,38 @@ from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
from calibre.ebooks.docx.char_styles import RunStyle
from calibre.ebooks.docx.names import XPath, get
class PageProperties(object):
'''
Class representing page level properties (page size/margins) read from
sectPr elements.
'''
def __init__(self, elems=()):
self.width = self.height = 595.28, 841.89 # pts, A4
self.margin_left = self.margin_right = 72 # pts
for sectPr in elems:
for pgSz in XPath('./w:pgSz')(sectPr):
w, h = get(pgSz, 'w:w'), get(pgSz, 'w:h')
try:
self.width = int(w)/20
except (ValueError, TypeError):
pass
try:
self.height = int(h)/20
except (ValueError, TypeError):
pass
for pgMar in XPath('./w:pgMar')(sectPr):
l, r = get(pgMar, 'w:left'), get(pgMar, 'w:right')
try:
self.margin_left = int(l)/20
except (ValueError, TypeError):
pass
try:
self.margin_right = int(r)/20
except (ValueError, TypeError):
pass
class Style(object):
'''
@ -352,6 +384,19 @@ class Styles(object):
p { text-indent: 1.5em }
ul, ol, p { margin: 0; padding: 0 }
sup.noteref a { text-decoration: none }
h1.notes-header { page-break-before: always }
dl.notes dt { font-size: large }
dl.notes dt a { text-decoration: none }
dl.notes dd { page-break-after: always }
dl.notes dd:last-of-type { page-break-after: avoid }
''') % (self.body_font_family, self.body_font_size)
if ef:
prefix = ef + '\n' + prefix

View File

@ -7,17 +7,24 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, os, re
from collections import OrderedDict
from collections import OrderedDict, defaultdict
from lxml import html
from lxml.html.builder import (
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, SUP, A, DT, DL, DD, H1)
from calibre.ebooks.docx.container import DOCX, fromstring
from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS
from calibre.ebooks.docx.styles import Styles, inherit
from calibre.ebooks.docx.names import (
XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor,
descendants, ancestor, FOOTNOTES, ENDNOTES)
from calibre.ebooks.docx.styles import Styles, inherit, PageProperties
from calibre.ebooks.docx.numbering import Numbering
from calibre.ebooks.docx.fonts import Fonts
from calibre.ebooks.docx.images import Images
from calibre.ebooks.docx.footnotes import Footnotes
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
class Text:
@ -31,13 +38,15 @@ class Text:
class Convert(object):
def __init__(self, path_or_stream, dest_dir=None, log=None):
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
self.docx = DOCX(path_or_stream, log=log)
self.log = self.docx.log
self.notes_text = notes_text or _('Notes')
self.dest_dir = dest_dir or os.getcwdu()
self.mi = self.docx.metadata
self.body = BODY()
self.styles = Styles()
self.images = Images()
self.object_map = OrderedDict()
self.html = HTML(
HEAD(
@ -64,12 +73,37 @@ class Convert(object):
doc = self.docx.document
relationships_by_id, relationships_by_type = self.docx.document_relationships
self.read_styles(relationships_by_type)
self.images(relationships_by_id)
self.layers = OrderedDict()
for wp in XPath('//w:p')(doc):
self.framed = [[]]
self.framed_map = {}
self.anchor_map = {}
self.link_map = defaultdict(list)
self.read_page_properties(doc)
for wp, page_properties in self.page_map.iteritems():
self.current_page = page_properties
p = self.convert_p(wp)
self.body.append(p)
notes_header = None
if self.footnotes.has_notes:
dl = DL()
dl.set('class', 'notes')
self.body.append(H1(self.notes_text))
notes_header = self.body[-1]
notes_header.set('class', 'notes-header')
self.body.append(dl)
for anchor, text, note in self.footnotes:
dl.append(DT('[', A('' + text, href='#back_%s' % anchor, title=text), id=anchor))
dl[-1][0].tail = ']'
dl.append(DD())
for wp in note:
p = self.convert_p(wp)
dl[-1].append(p)
self.resolve_links(relationships_by_id)
# TODO: tables <w:tbl> child of <w:body> (nested tables?)
# TODO: Last section properties <w:sectPr> child of <w:body>
self.styles.cascade(self.layers)
@ -84,6 +118,7 @@ class Convert(object):
lvl = 0
numbered.append((html_obj, num_id, lvl))
self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
self.apply_frames()
if len(self.body) > 0:
self.body.text = '\n\t'
@ -100,7 +135,39 @@ class Convert(object):
cls = self.styles.class_name(css)
if cls:
html_obj.set('class', cls)
self.write()
for html_obj, css in self.framed_map.iteritems():
cls = self.styles.class_name(css)
if cls:
html_obj.set('class', cls)
if notes_header is not None:
for h in self.body.iterchildren('h1', 'h2', 'h3'):
notes_header.tag = h.tag
cls = h.get('class', None)
if cls and cls != 'notes-header':
notes_header.set('class', '%s notes-header' % cls)
break
return self.write()
def read_page_properties(self, doc):
current = []
self.page_map = OrderedDict()
for p in descendants(doc, 'w:p'):
sect = tuple(descendants(p, 'w:sectPr'))
if sect:
pr = PageProperties(sect)
for x in current + [p]:
self.page_map[x] = pr
current = []
else:
current.append(p)
if current:
last = XPath('./w:body/w:sectPr')(doc)
pr = PageProperties(last)
for x in current:
self.page_map[x] = pr
def read_styles(self, relationships_by_type):
@ -109,16 +176,32 @@ class Convert(object):
if name is None:
cname = self.docx.document_name.split('/')
cname[-1] = defname
if self.docx.exists(cname):
if self.docx.exists('/'.join(cname)):
name = name
return name
nname = get_name(NUMBERING, 'numbering.xml')
sname = get_name(STYLES, 'styles.xml')
fname = get_name(FONTS, 'fontTable.xml')
foname = get_name(FOOTNOTES, 'footnotes.xml')
enname = get_name(ENDNOTES, 'endnotes.xml')
numbering = self.numbering = Numbering()
footnotes = self.footnotes = Footnotes()
fonts = self.fonts = Fonts()
foraw = enraw = None
if foname is not None:
try:
foraw = self.docx.read(foname)
except KeyError:
self.log.warn('Footnotes %s do not exist' % foname)
if enname is not None:
try:
enraw = self.docx.read(enname)
except KeyError:
self.log.warn('Endnotes %s do not exist' % enname)
footnotes(fromstring(foraw) if foraw else None, fromstring(enraw) if enraw else None)
if fname is not None:
embed_relationships = self.docx.get_relationships(fname)[0]
try:
@ -146,7 +229,48 @@ class Convert(object):
self.styles.resolve_numbering(numbering)
def create_toc(self):
' Create a TOC from headings in the document '
root = self.body
headings = ('h1', 'h2', 'h3')
tocroot = TOC()
xpaths = [XPath('//%s' % x) for x in headings]
level_prev = {i+1:None for i in xrange(len(xpaths))}
level_prev[0] = tocroot
level_item_map = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)}
item_level_map = {e:i for i, elems in level_item_map.iteritems() for e in elems}
self.idcount = 0
def ensure_id(elem):
ans = elem.get('id', None)
if not ans:
self.idcount += 1
ans = 'toc_id_%d' % self.idcount
elem.set('id', ans)
return ans
for item in root.iterdescendants(*headings):
lvl = plvl = item_level_map.get(item, None)
if lvl is None:
continue
parent = None
while parent is None:
plvl -= 1
parent = level_prev[plvl]
lvl = plvl + 1
elem_id = ensure_id(item)
text = elem_to_toc_text(item)
toc = parent.add_item('index.html', elem_id, text)
level_prev[lvl] = toc
for i in xrange(lvl+1, len(xpaths)+1):
level_prev[i] = None
if len(tuple(tocroot.flat())) > 1:
return tocroot
def write(self):
toc = self.create_toc()
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
f.write(raw)
@ -155,19 +279,48 @@ class Convert(object):
with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
f.write(css.encode('utf-8'))
opf = OPFCreator(self.dest_dir, self.mi)
opf.toc = toc
opf.create_manifest_from_files_in([self.dest_dir])
opf.create_spine(['index.html'])
with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(os.path.join(self.dest_dir, 'toc.ncx'), 'wb') as ncx:
opf.render(of, ncx, 'toc.ncx')
return os.path.join(self.dest_dir, 'metadata.opf')
def convert_p(self, p):
dest = P()
self.object_map[dest] = p
style = self.styles.resolve_paragraph(p)
self.layers[p] = []
for run in XPath('descendant::w:r')(p):
span = self.convert_run(run)
dest.append(span)
self.layers[p].append(run)
self.add_frame(dest, style.frame)
current_anchor = None
current_hyperlink = None
for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'):
if x.tag.endswith('}r'):
span = self.convert_run(x)
if current_anchor is not None:
(dest if len(dest) == 0 else span).set('id', current_anchor)
current_anchor = None
if current_hyperlink is not None:
hl = ancestor(x, 'w:hyperlink')
if hl is not None:
self.link_map[hl].append(span)
else:
current_hyperlink = None
dest.append(span)
self.layers[p].append(x)
elif x.tag.endswith('}bookmarkStart'):
anchor = get(x, 'w:name')
if anchor and anchor not in self.anchor_map:
self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(self.anchor_map.itervalues()))
elif x.tag.endswith('}hyperlink'):
current_hyperlink = x
m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
if m is not None:
n = min(1, max(6, int(m.group(1))))
n = min(6, max(1, int(m.group(1))))
dest.tag = 'h%d' % n
if style.direction == 'rtl':
@ -208,6 +361,31 @@ class Convert(object):
for elem in elems:
p.remove(elem)
wrapper.append(elem)
return wrapper
def resolve_links(self, relationships_by_id):
for hyperlink, spans in self.link_map.iteritems():
span = spans[0]
if len(spans) > 1:
span = self.wrap_elems(spans, SPAN())
span.tag = 'a'
tgt = get(hyperlink, 'w:tgtFrame')
if tgt:
span.set('target', tgt)
tt = get(hyperlink, 'w:tooltip')
if tt:
span.set('title', tt)
rid = get(hyperlink, 'r:id')
if rid and rid in relationships_by_id:
span.set('href', relationships_by_id[rid])
continue
anchor = get(hyperlink, 'w:anchor')
if anchor and anchor in self.anchor_map:
span.set('href', '#' + self.anchor_map[anchor])
continue
self.log.warn('Hyperlink with unknown target (%s, %s), ignoring' %
(rid, anchor))
span.set('href', '#')
def convert_run(self, run):
ans = SPAN()
@ -239,6 +417,17 @@ class Convert(object):
br = BR()
text.add_elem(br)
ans.append(text.elem)
elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'):
for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
text.add_elem(img)
ans.append(text.elem)
elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'):
anchor, name = self.footnotes.get_ref(child)
if anchor and name:
l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor)
l.set('class', 'noteref')
text.add_elem(l)
ans.append(text.elem)
if text.buf:
setattr(text.elem, text.attr, ''.join(text.buf))
@ -249,7 +438,39 @@ class Convert(object):
ans.lang = style.lang
return ans
def add_frame(self, html_obj, style):
last_run = self.framed[-1]
if style is inherit:
if last_run:
self.framed.append([])
return
if last_run:
if last_run[-1][1] == style:
last_run.append((html_obj, style))
else:
self.framed.append((html_obj, style))
else:
last_run.append((html_obj, style))
def apply_frames(self):
for run in filter(None, self.framed):
style = run[0][1]
paras = tuple(x[0] for x in run)
parent = paras[0].getparent()
idx = parent.index(paras[0])
frame = DIV(*paras)
parent.insert(idx, frame)
self.framed_map[frame] = css = style.css(self.page_map[self.object_map[paras[0]]])
self.styles.register(css, 'frame')
if __name__ == '__main__':
import shutil
from calibre.utils.logging import default_log
default_log.filter_level = default_log.DEBUG
Convert(sys.argv[-1], log=default_log)()
dest_dir = os.path.join(os.getcwdu(), 'docx_input')
if os.path.exists(dest_dir):
shutil.rmtree(dest_dir)
os.mkdir(dest_dir)
Convert(sys.argv[-1], dest_dir=dest_dir, log=default_log)()

View File

@ -179,7 +179,7 @@ class Metadata(object):
def deepcopy(self):
''' Do not use this method unless you know what you are doing, if you want to create a simple clone of
this object, use :method:`deepcopy_metadata` instead. '''
this object, use :meth:`deepcopy_metadata` instead. '''
m = Metadata(None)
m.__dict__ = copy.deepcopy(self.__dict__)
object.__setattr__(m, '_data', copy.deepcopy(object.__getattribute__(self, '_data')))

View File

@ -21,7 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date, isoformat
from calibre.utils.localization import get_lang, canonicalize_lang
from calibre import prints, guess_type
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
from calibre.utils.config import tweaks
class Resource(object): # {{{
@ -1436,7 +1436,10 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
attrib['name'] = name
if content:
attrib['content'] = content
elem = metadata.makeelement(tag, attrib=attrib)
try:
elem = metadata.makeelement(tag, attrib=attrib)
except ValueError:
elem = metadata.makeelement(tag, attrib={k:clean_xml_chars(v) for k, v in attrib.iteritems()})
elem.tail = '\n'+(' '*8)
if text:
try:

View File

@ -100,7 +100,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
mr = mobi8_reader
flows = []
img_pattern = re.compile(r'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
img_pattern = re.compile(r'''(<[img\s|image\s|svg:image\s][^>]*>)''', re.IGNORECASE)
img_index_pattern = re.compile(r'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''', re.IGNORECASE)
tag_pattern = re.compile(r'''(<[^>]*>)''')
@ -128,7 +128,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
srcpieces = img_pattern.split(flow)
for j in range(1, len(srcpieces), 2):
tag = srcpieces[j]
if tag.startswith('<im'):
if tag.startswith('<im') or tag.startswith('<svg:image'):
for m in img_index_pattern.finditer(tag):
num = int(m.group(1), 32)
href = resource_map[num-1]

View File

@ -228,7 +228,7 @@ class Mobi8Reader(object):
self.flowinfo.append(FlowInfo(None, None, None, None))
svg_tag_pattern = re.compile(br'''(<svg[^>]*>)''', re.IGNORECASE)
image_tag_pattern = re.compile(br'''(<image[^>]*>)''', re.IGNORECASE)
image_tag_pattern = re.compile(br'''(<(?:svg:)?image[^>]*>)''', re.IGNORECASE)
for j in xrange(1, len(self.flows)):
flowpart = self.flows[j]
nstr = '%04d' % j
@ -243,7 +243,7 @@ class Mobi8Reader(object):
dir = None
fname = None
# strip off anything before <svg if inlining
flowpart = flowpart[start:]
flowpart = re.sub(br'(</?)svg:', r'\1', flowpart[start:])
else:
format = 'file'
dir = "images"

View File

@ -11,7 +11,7 @@ import re
from calibre import guess_type
class EntityDeclarationProcessor(object): # {{{
class EntityDeclarationProcessor(object): # {{{
def __init__(self, html):
self.declared_entities = {}
@ -51,7 +51,7 @@ def load_html(path, view, codec='utf-8', mime_type=None,
loading_url = QUrl.fromLocalFile(path)
pre_load_callback(loading_url)
if force_as_html or re.search(r'<[:a-zA-Z0-9-]*svg', html) is None:
if force_as_html or re.search(r'<[a-zA-Z0-9-]+:svg', html) is None:
view.setHtml(html, loading_url)
else:
view.setContent(QByteArray(html.encode(codec)), mime_type,
@ -61,4 +61,3 @@ def load_html(path, view, codec='utf-8', mime_type=None,
if not elem.isNull():
return False
return True

View File

@ -32,7 +32,8 @@ def dynamic_rescale_factor(node):
classes = node.get('class', '').split(' ')
classes = [x.replace('calibre_rescale_', '') for x in classes if
x.startswith('calibre_rescale_')]
if not classes: return None
if not classes:
return None
factor = 1.0
for x in classes:
try:
@ -54,7 +55,8 @@ class KeyMapper(object):
return base
size = float(size)
base = float(base)
if abs(size - base) < 0.1: return 0
if abs(size - base) < 0.1:
return 0
sign = -1 if size < base else 1
endp = 0 if size < base else 36
diff = (abs(base - size) * 3) + ((36 - size) / 100)
@ -110,7 +112,8 @@ class EmbedFontsCSSRules(object):
self.href = None
def __call__(self, oeb):
if not self.body_font_family: return None
if not self.body_font_family:
return None
if not self.href:
iid, href = oeb.manifest.generate(u'page_styles', u'page_styles.css')
rules = [x.cssText for x in self.rules]
@ -228,10 +231,10 @@ class CSSFlattener(object):
bs.append('margin-top: 0pt')
bs.append('margin-bottom: 0pt')
if float(self.context.margin_left) >= 0:
bs.append('margin-left : %gpt'%\
bs.append('margin-left : %gpt'%
float(self.context.margin_left))
if float(self.context.margin_right) >= 0:
bs.append('margin-right : %gpt'%\
bs.append('margin-right : %gpt'%
float(self.context.margin_right))
bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
if self.page_break_on_body:
@ -277,8 +280,10 @@ class CSSFlattener(object):
for kind in ('margin', 'padding'):
for edge in ('bottom', 'top'):
property = "%s-%s" % (kind, edge)
if property not in cssdict: continue
if '%' in cssdict[property]: continue
if property not in cssdict:
continue
if '%' in cssdict[property]:
continue
value = style[property]
if value == 0:
continue
@ -296,7 +301,7 @@ class CSSFlattener(object):
def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id):
if not isinstance(node.tag, basestring) \
or namespace(node.tag) != XHTML_NS:
return
return
tag = barename(node.tag)
style = stylizer.style(node)
cssdict = style.cssdict()
@ -360,12 +365,17 @@ class CSSFlattener(object):
pass
del node.attrib['bgcolor']
if cssdict.get('font-weight', '').lower() == 'medium':
cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium
cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium
fsize = font_size
is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in
cssdict and len(node) == 0 and node.text and
len(node.text) == 1)
is_drop_cap = is_drop_cap or (
# The docx input plugin generates drop caps that look like this
len(node) == 1 and not node.text and len(node[0]) == 0 and
node[0].text and not node[0].tail and len(node[0].text) == 1 and
'line-height' in cssdict and 'font-size' in cssdict)
if not self.context.disable_font_rescaling and not is_drop_cap:
_sbase = self.sbase if self.sbase is not None else \
self.context.source.fbase
@ -436,8 +446,7 @@ class CSSFlattener(object):
keep_classes = set()
if cssdict:
items = cssdict.items()
items.sort()
items = sorted(cssdict.items())
css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
classes = node.get('class', '').strip() or 'calibre'
klass = ascii_text(STRIPNUM.sub('', classes.split()[0].replace('_', '')))
@ -519,8 +528,7 @@ class CSSFlattener(object):
if float(self.context.margin_bottom) >= 0:
stylizer.page_rule['margin-bottom'] = '%gpt'%\
float(self.context.margin_bottom)
items = stylizer.page_rule.items()
items.sort()
items = sorted(stylizer.page_rule.items())
css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
css = ('@page {\n%s\n}\n'%css) if items else ''
rules = [r.cssText for r in stylizer.font_face_rules +
@ -556,14 +564,14 @@ class CSSFlattener(object):
body = html.find(XHTML('body'))
fsize = self.context.dest.fbase
self.flatten_node(body, stylizer, names, styles, pseudo_styles, fsize, item.id)
items = [(key, val) for (val, key) in styles.items()]
items.sort()
items = sorted([(key, val) for (val, key) in styles.items()])
# :hover must come after link and :active must come after :hover
psels = sorted(pseudo_styles.iterkeys(), key=lambda x :
{'hover':1, 'active':2}.get(x, 0))
for psel in psels:
styles = pseudo_styles[psel]
if not styles: continue
if not styles:
continue
x = sorted(((k+':'+psel, v) for v, k in styles.iteritems()))
items.extend(x)

View File

@ -375,6 +375,8 @@ class FlowSplitter(object):
for img in root.xpath('//h:img', namespaces=NAMESPACES):
if img.get('style', '') != 'display:none':
return False
if root.xpath('//*[local-name() = "svg"]'):
return False
return True
def split_text(self, text, root, size):

View File

@ -22,7 +22,7 @@ from calibre.gui2 import (gprefs, warning_dialog, Dispatcher, error_dialog,
from calibre.library.database2 import LibraryDatabase2
from calibre.gui2.actions import InterfaceAction
class LibraryUsageStats(object): # {{{
class LibraryUsageStats(object): # {{{
def __init__(self):
self.stats = {}
@ -92,7 +92,7 @@ class LibraryUsageStats(object): # {{{
self.write_stats()
# }}}
class MovedDialog(QDialog): # {{{
class MovedDialog(QDialog): # {{{
def __init__(self, stats, location, parent=None):
QDialog.__init__(self, parent)
@ -161,13 +161,15 @@ class ChooseLibraryAction(InterfaceAction):
def genesis(self):
self.base_text = _('%d books')
self.count_changed(0)
self.qaction.triggered.connect(self.choose_library,
type=Qt.QueuedConnection)
self.action_choose = self.menuless_qaction
self.stats = LibraryUsageStats()
self.popup_type = (QToolButton.InstantPopup if len(self.stats.stats) > 1 else
QToolButton.MenuButtonPopup)
if len(self.stats.stats) > 1:
self.action_choose.triggered.connect(self.choose_library)
else:
self.qaction.triggered.connect(self.choose_library)
self.choose_menu = self.qaction.menu()
@ -200,7 +202,6 @@ class ChooseLibraryAction(InterfaceAction):
type=Qt.QueuedConnection)
self.choose_menu.addAction(ac)
self.rename_separator = self.choose_menu.addSeparator()
self.maintenance_menu = QMenu(_('Library Maintenance'))
@ -477,19 +478,20 @@ class ChooseLibraryAction(InterfaceAction):
else:
return
#from calibre.utils.mem import memory
#import weakref
#from PyQt4.Qt import QTimer
#self.dbref = weakref.ref(self.gui.library_view.model().db)
#self.before_mem = memory()/1024**2
# from calibre.utils.mem import memory
# import weakref
# from PyQt4.Qt import QTimer
# self.dbref = weakref.ref(self.gui.library_view.model().db)
# self.before_mem = memory()/1024**2
self.gui.library_moved(loc, allow_rebuild=True)
#QTimer.singleShot(5000, self.debug_leak)
# QTimer.singleShot(5000, self.debug_leak)
def debug_leak(self):
import gc
from calibre.utils.mem import memory
ref = self.dbref
for i in xrange(3): gc.collect()
for i in xrange(3):
gc.collect()
if ref() is not None:
print 'DB object alive:', ref()
for r in gc.get_referrers(ref())[:10]:
@ -500,7 +502,6 @@ class ChooseLibraryAction(InterfaceAction):
print
self.dbref = self.before_mem = None
def qs_requested(self, idx, *args):
self.switch_requested(self.qs_locations[idx])
@ -546,3 +547,4 @@ class ChooseLibraryAction(InterfaceAction):
return False
return True

View File

@ -907,7 +907,7 @@ class BooksModel(QAbstractTableModel): # {{{
if ht == 'timestamp': # change help text because users know this field as 'date'
ht = 'date'
if self.db.field_metadata[self.column_map[section]]['is_category']:
is_cat = '.\n\n' + _('Click in this column and press Q to to Quickview books with the same %s' % ht)
is_cat = '.\n\n' + _('Click in this column and press Q to Quickview books with the same %s' % ht)
else:
is_cat = ''
return QVariant(_('The lookup/search name is "{0}"{1}').format(ht, is_cat))
@ -1029,7 +1029,7 @@ class BooksModel(QAbstractTableModel): # {{{
return False
val = (int(value.toInt()[0]) if column == 'rating' else
value.toDateTime() if column in ('timestamp', 'pubdate')
else unicode(value.toString()).strip())
else re.sub(ur'\s', u' ', unicode(value.toString()).strip()))
id = self.db.id(row)
books_to_refresh = set([id])
if column == 'rating':

View File

@ -45,6 +45,9 @@ def save_dialog(parent, title, msg, det_msg=''):
d.setStandardButtons(QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel)
return d.exec_()
def clean_text(x):
return re.sub(r'\s', ' ', x.strip())
'''
The interface common to all widgets used to set basic metadata
class BasicMetadataWidget(object):
@ -117,7 +120,7 @@ class TitleEdit(EnLineEdit):
def current_val(self):
def fget(self):
title = unicode(self.text()).strip()
title = clean_text(unicode(self.text()))
if not title:
title = self.get_default()
return title
@ -289,7 +292,7 @@ class AuthorsEdit(EditWithComplete):
def current_val(self):
def fget(self):
au = unicode(self.text()).strip()
au = clean_text(unicode(self.text()))
if not au:
au = self.get_default()
return string_to_authors(au)
@ -352,7 +355,7 @@ class AuthorSortEdit(EnLineEdit):
def current_val(self):
def fget(self):
return unicode(self.text()).strip()
return clean_text(unicode(self.text()))
def fset(self, val):
if not val:
@ -472,7 +475,7 @@ class SeriesEdit(EditWithComplete):
def current_val(self):
def fget(self):
return unicode(self.currentText()).strip()
return clean_text(unicode(self.currentText()))
def fset(self, val):
if not val:
@ -1135,7 +1138,7 @@ class TagsEdit(EditWithComplete): # {{{
@dynamic_property
def current_val(self):
def fget(self):
return [x.strip() for x in unicode(self.text()).split(',')]
return [clean_text(x) for x in unicode(self.text()).split(',')]
def fset(self, val):
if not val:
val = []
@ -1237,7 +1240,7 @@ class IdentifiersEdit(QLineEdit): # {{{
def current_val(self):
def fget(self):
raw = unicode(self.text()).strip()
parts = [x.strip() for x in raw.split(',')]
parts = [clean_text(x) for x in raw.split(',')]
ans = {}
for x in parts:
c = x.split(':')
@ -1376,7 +1379,7 @@ class PublisherEdit(EditWithComplete): # {{{
def current_val(self):
def fget(self):
return unicode(self.currentText()).strip()
return clean_text(unicode(self.currentText()))
def fset(self, val):
if not val:

View File

@ -146,8 +146,12 @@ class CreateVirtualLibrary(QDialog): # {{{
<p>For example you can use a Virtual Library to only show you books with the Tag <i>"Unread"</i>
or only books by <i>"My Favorite Author"</i> or only books in a particular series.</p>
<p>More information and examples are available in the
<a href="http://manual.calibre-ebook.com/virtual_libraries.html">User Manual</a>.</p>
'''))
hl.setWordWrap(True)
hl.setOpenExternalLinks(True)
hl.setFrameStyle(hl.StyledPanel)
gl.addWidget(hl, 0, 3, 4, 1)

View File

@ -41,7 +41,6 @@ class JavaScriptLoader(object):
'hyphenation', 'hyphenator', 'utils', 'cfi', 'indexing', 'paged',
'fs', 'math', 'extract')
def __init__(self, dynamic_coffeescript=False):
self._dynamic_coffeescript = dynamic_coffeescript
if self._dynamic_coffeescript:
@ -68,7 +67,8 @@ class JavaScriptLoader(object):
allow_user_override=False).decode('utf-8')
else:
dynamic = (self._dynamic_coffeescript and
os.path.exists(calibre.__file__))
calibre.__file__ and not calibre.__file__.endswith('.pyo') and
os.path.exists(calibre.__file__))
ans = compiled_coffeescript(src, dynamic=dynamic).decode('utf-8')
self._cache[name] = ans
@ -105,4 +105,3 @@ class JavaScriptLoader(object):
evaljs('\n\n'.join(self._hp_cache.itervalues()))
return lang

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More