Merge from trunk

This commit is contained in:
Charles Haley 2010-06-24 07:41:55 +01:00
commit bb6cacd205
12 changed files with 207 additions and 38 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 315 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 315 B

View File

@ -1,6 +1,6 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
''' '''
lrb.co.uk lrb.co.uk
''' '''
@ -8,17 +8,20 @@ lrb.co.uk
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class LondonReviewOfBooks(BasicNewsRecipe): class LondonReviewOfBooks(BasicNewsRecipe):
title = u'London Review of Books' title = 'London Review of Books (free)'
__author__ = u'Darko Miletic' __author__ = 'Darko Miletic'
description = u'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' description = 'Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
category = 'news, literature, England' category = 'news, literature, UK'
publisher = 'London Review of Books' publisher = 'LRB ltd.'
oldest_article = 7 oldest_article = 15
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'en_GB' language = 'en_GB'
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
encoding = 'utf-8' encoding = 'utf-8'
publication_type = 'magazine'
masthead_url = 'http://www.lrb.co.uk/assets/images/lrb_logo_big.gif'
extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
conversion_options = { conversion_options = {
'comments' : description 'comments' : description
@ -27,13 +30,16 @@ class LondonReviewOfBooks(BasicNewsRecipe):
,'publisher' : publisher ,'publisher' : publisher
} }
keep_only_tags = [dict(name='div' , attrs={'id' :'main'})] keep_only_tags = [dict(attrs={'class':['article-body indent','letters','article-list']})]
remove_tags = [ remove_attributes = ['width','height']
dict(name='div' , attrs={'class':['pagetools','issue-nav-controls','nocss']})
,dict(name='div' , attrs={'id' :['mainmenu','precontent','otherarticles'] })
,dict(name='span', attrs={'class':['inlineright','article-icons']})
,dict(name='ul' , attrs={'class':'article-controls'})
,dict(name='p' , attrs={'class':'meta-info' })
]
feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')] feeds = [(u'London Review of Books', u'http://www.lrb.co.uk/lrbrss.xml')]
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup('http://www.lrb.co.uk/')
cover_item = soup.find('p',attrs={'class':'cover'})
if cover_item:
cover_url = 'http://www.lrb.co.uk' + cover_item.a.img['src']
return cover_url

View File

@ -0,0 +1,75 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
lrb.co.uk
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class LondonReviewOfBooksPayed(BasicNewsRecipe):
title = 'London Review of Books'
__author__ = 'Darko Miletic'
description = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
category = 'news, literature, UK'
publisher = 'LRB Ltd.'
max_articles_per_feed = 100
language = 'en_GB'
no_stylesheets = True
delay = 1
use_embedded_content = False
encoding = 'utf-8'
INDEX = 'http://www.lrb.co.uk'
LOGIN = INDEX + '/login'
masthead_url = INDEX + '/assets/images/lrb_logo_big.gif'
needs_subscription = True
publication_type = 'magazine'
extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(nr=1)
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('p',attrs={'class':'cover'})
lrbtitle = self.title
if cover_item:
self.cover_url = self.INDEX + cover_item.a.img['src']
content = self.INDEX + cover_item.a['href']
soup2 = self.index_to_soup(content)
sitem = soup2.find(attrs={'class':'article-list'})
lrbtitle = soup2.head.title.string
for item in sitem.findAll('a',attrs={'class':'title'}):
description = u''
title_prefix = u''
feed_link = item
if feed_link.has_key('href'):
url = self.INDEX + feed_link['href']
title = title_prefix + self.tag_to_string(feed_link)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
return [(lrbtitle, articles)]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})]
remove_attributes = ['width','height']

View File

@ -172,5 +172,10 @@ class CollectionsBookList(BookList):
For each book in the booklist for the card oncard, remove it from all For each book in the booklist for the card oncard, remove it from all
its current collections, then add it to the collections specified in its current collections, then add it to the collections specified in
device_collections. device_collections.
oncard is None for the main memory, carda for card A, cardb for card B,
etc.
booklist is the object created by the :method:`books` call above.
''' '''
pass pass

View File

@ -107,9 +107,21 @@ class CSSPreProcessor(object):
PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}') PAGE_PAT = re.compile(r'@page[^{]*?{[^}]*?}')
def __call__(self, data): def __call__(self, data, add_namespace=False):
from calibre.ebooks.oeb.base import XHTML_CSS_NAMESPACE
data = self.PAGE_PAT.sub('', data) data = self.PAGE_PAT.sub('', data)
if not add_namespace:
return data return data
ans, namespaced = [], False
for line in data.splitlines():
ll = line.lstrip()
if not (namespaced or ll.startswith('@import') or
ll.startswith('@charset')):
ans.append(XHTML_CSS_NAMESPACE.strip())
namespaced = True
ans.append(line)
return u'\n'.join(ans)
class HTMLPreProcessor(object): class HTMLPreProcessor(object):

View File

@ -20,7 +20,7 @@ from itertools import izip
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation
from calibre.constants import islinux, isfreebsd from calibre.constants import islinux, isfreebsd, iswindows
from calibre import unicode_path from calibre import unicode_path
from calibre.utils.localization import get_lang from calibre.utils.localization import get_lang
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
@ -32,9 +32,14 @@ class Link(object):
@classmethod @classmethod
def url_to_local_path(cls, url, base): def url_to_local_path(cls, url, base):
path = urlunparse(('', '', url.path, url.params, url.query, '')) path = url.path
isabs = False
if iswindows and path.startswith('/'):
path = path[1:]
isabs = True
path = urlunparse(('', '', path, url.params, url.query, ''))
path = unquote(path) path = unquote(path)
if os.path.isabs(path): if isabs or os.path.isabs(path):
return path return path
return os.path.abspath(os.path.join(base, path)) return os.path.abspath(os.path.join(base, path))
@ -307,6 +312,7 @@ class HTMLInput(InputFormatPlugin):
xpath xpath
from calibre import guess_type from calibre import guess_type
import cssutils import cssutils
self.OEB_STYLES = OEB_STYLES
oeb = create_oebbook(log, None, opts, self, oeb = create_oebbook(log, None, opts, self,
encoding=opts.input_encoding, populate=False) encoding=opts.input_encoding, populate=False)
self.oeb = oeb self.oeb = oeb
@ -371,7 +377,7 @@ class HTMLInput(InputFormatPlugin):
rewrite_links(item.data, partial(self.resource_adder, base=dpath)) rewrite_links(item.data, partial(self.resource_adder, base=dpath))
for item in oeb.manifest.values(): for item in oeb.manifest.values():
if item.media_type in OEB_STYLES: if item.media_type in self.OEB_STYLES:
dpath = None dpath = None
for path, href in self.added_resources.items(): for path, href in self.added_resources.items():
if href == item.href: if href == item.href:
@ -409,12 +415,30 @@ class HTMLInput(InputFormatPlugin):
oeb.container = DirContainer(os.getcwdu(), oeb.log) oeb.container = DirContainer(os.getcwdu(), oeb.log)
return oeb return oeb
def link_to_local_path(self, link_, base=None):
if not isinstance(link_, unicode):
try:
link_ = link_.decode('utf-8', 'error')
except:
self.log.warn('Failed to decode link %r. Ignoring'%link_)
return None, None
try:
l = Link(link_, base if base else os.getcwdu())
except:
self.log.exception('Failed to process link: %r'%link_)
return None, None
if l.path is None:
# Not a local resource
return None, None
link = l.path.replace('/', os.sep).strip()
frag = l.fragment
if not link:
return None, None
return link, frag
def resource_adder(self, link_, base=None): def resource_adder(self, link_, base=None):
link = self.urlnormalize(link_) link, frag = self.link_to_local_path(link_, base=base)
link, frag = self.urldefrag(link) if link is None:
link = unquote(link).replace('/', os.sep)
if not link.strip():
return link_ return link_
try: try:
if base and not os.path.isabs(link): if base and not os.path.isabs(link):
@ -442,6 +466,9 @@ class HTMLInput(InputFormatPlugin):
item = self.oeb.manifest.add(id, href, media_type) item = self.oeb.manifest.add(id, href, media_type)
item.html_input_href = bhref item.html_input_href = bhref
if guessed in self.OEB_STYLES:
item.override_css_fetch = partial(
self.css_import_handler, os.path.dirname(link))
item.data item.data
self.added_resources[link] = href self.added_resources[link] = href
@ -450,7 +477,17 @@ class HTMLInput(InputFormatPlugin):
nlink = '#'.join((nlink, frag)) nlink = '#'.join((nlink, frag))
return nlink return nlink
def css_import_handler(self, base, href):
link, frag = self.link_to_local_path(href, base=base)
if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
return (None, None)
try:
raw = open(link, 'rb').read().decode('utf-8', 'replace')
raw = self.oeb.css_preprocessor(raw, add_namespace=True)
except:
self.log.exception('Failed to read CSS file: %r'%link)
return (None, None)
return (None, raw)

View File

@ -17,6 +17,7 @@ from urlparse import urljoin
from lxml import etree, html from lxml import etree, html
from cssutils import CSSParser from cssutils import CSSParser
from cssutils.css import CSSRule
import calibre import calibre
from calibre.constants import filesystem_encoding from calibre.constants import filesystem_encoding
@ -762,6 +763,7 @@ class Manifest(object):
self.href = self.path = urlnormalize(href) self.href = self.path = urlnormalize(href)
self.media_type = media_type self.media_type = media_type
self.fallback = fallback self.fallback = fallback
self.override_css_fetch = None
self.spine_position = None self.spine_position = None
self.linear = True self.linear = True
if loader is None and data is None: if loader is None and data is None:
@ -982,15 +984,40 @@ class Manifest(object):
def _parse_css(self, data): def _parse_css(self, data):
def get_style_rules_from_import(import_rule):
ans = []
if not import_rule.styleSheet:
return ans
rules = import_rule.styleSheet.cssRules
for rule in rules:
if rule.type == CSSRule.IMPORT_RULE:
ans.extend(get_style_rules_from_import(rule))
elif rule.type in (CSSRule.FONT_FACE_RULE,
CSSRule.STYLE_RULE):
ans.append(rule)
return ans
self.oeb.log.debug('Parsing', self.href, '...') self.oeb.log.debug('Parsing', self.href, '...')
data = self.oeb.decode(data) data = self.oeb.decode(data)
data = self.oeb.css_preprocessor(data) data = self.oeb.css_preprocessor(data, add_namespace=True)
data = XHTML_CSS_NAMESPACE + data
parser = CSSParser(loglevel=logging.WARNING, parser = CSSParser(loglevel=logging.WARNING,
fetcher=self._fetch_css, fetcher=self.override_css_fetch or self._fetch_css,
log=_css_logger) log=_css_logger)
data = parser.parseString(data, href=self.href) data = parser.parseString(data, href=self.href)
data.namespaces['h'] = XHTML_NS data.namespaces['h'] = XHTML_NS
import_rules = list(data.cssRules.rulesOfType(CSSRule.IMPORT_RULE))
rules_to_append = []
insert_index = None
for r in data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
insert_index = data.cssRules.index(r)
break
for rule in import_rules:
rules_to_append.extend(get_style_rules_from_import(rule))
for r in reversed(rules_to_append):
data.insertRule(r, index=insert_index)
for rule in import_rules:
data.deleteRule(rule)
return data return data
def _fetch_css(self, path): def _fetch_css(self, path):

View File

@ -139,11 +139,18 @@ class EbookIterator(object):
if id != -1: if id != -1:
families = [unicode(f) for f in QFontDatabase.applicationFontFamilies(id)] families = [unicode(f) for f in QFontDatabase.applicationFontFamilies(id)]
if family: if family:
family = family.group(1).strip().replace('"', '') family = family.group(1)
bad_map[family] = families[0] specified_families = [x.strip().replace('"',
if family not in families: '').replace("'", '') for x in family.split(',')]
aliasing_ok = False
for f in specified_families:
bad_map[f] = families[0]
if not aliasing_ok and f in families:
aliasing_ok = True
if not aliasing_ok:
prints('WARNING: Family aliasing not fully supported.') prints('WARNING: Family aliasing not fully supported.')
prints('\tDeclared family: %s not in actual families: %s' prints('\tDeclared family: %r not in actual families: %r'
% (family, families)) % (family, families))
else: else:
prints('Loaded embedded font:', repr(family)) prints('Loaded embedded font:', repr(family))

View File

@ -1240,6 +1240,8 @@ class DeviceMixin(object): # {{{
self.card_b_view.reset() self.card_b_view.reset()
def _upload_collections(self, job, view): def _upload_collections(self, job, view):
if job.failed:
self.device_job_exception(job)
view.reset() view.reset()
def upload_collections(self, booklist, view): def upload_collections(self, booklist, view):

View File

@ -74,5 +74,3 @@ class TagListEditor(QDialog, Ui_TagListEditor):
self.to_delete.append(id) self.to_delete.append(id)
self.available_tags.takeItem(self.available_tags.row(item)) self.available_tags.takeItem(self.available_tags.row(item))
def accept(self):
QDialog.accept(self)

View File

@ -302,7 +302,7 @@ Take your pick:
Why does |app| show only some of my fonts on OS X? Why does |app| show only some of my fonts on OS X?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|app| embeds fonts in ebook files it creates. E-book files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts founf on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory. |app| embeds fonts in ebook files it creates. E-book files support embedding only TrueType (.ttf) fonts. Most fonts on OS X systems are in .dfont format, thus they cannot be embedded. |app| shows only TrueType fonts found on your system. You can obtain many TrueType fonts on the web. Simply download the .ttf files and add them to the Library/Fonts directory in your home directory.
|app| is not starting on Windows? |app| is not starting on Windows?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~