Merge from trunk

This commit is contained in:
Charles Haley 2010-10-01 18:21:58 +01:00
commit a5977d49e4
11 changed files with 115 additions and 8 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 696 B

View File

@ -0,0 +1,78 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
'''
sciencenews.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ScienceNewsIssue(BasicNewsRecipe):
title = u'Science News Recent Issues'
__author__ = u'Darko Miletic, Sujata Raman and Starson17'
description = u'''Science News is an award-winning weekly
newsmagazine covering the most important research in all fields of science.
Its 16 pages each week are packed with short, accurate articles that appeal
to both general readers and scientists. Published since 1922, the magazine
now reaches about 150,000 subscribers and more than 1 million readers.
These are the latest News Items from Science News. This recipe downloads
the last 30 days worth of articles.'''
category = u'Science, Technology, News'
publisher = u'Society for Science & the Public'
oldest_article = 30
language = 'en'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
timefmt = ' [%A, %d %B, %Y]'
recursions = 1
remove_attributes = ['style']
conversion_options = {'linearize_tables' : True
, 'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
extra_css = '''
.content_description{font-family:georgia ;font-size:x-large; color:#646464 ; font-weight:bold;}
.content_summary{font-family:georgia ;font-size:small ;color:#585858 ; font-weight:bold;}
.content_authors{font-family:helvetica,arial ;font-size: xx-small ;color:#14487E ;}
.content_edition{font-family:helvetica,arial ;font-size: xx-small ;}
.exclusive{color:#FF0000 ;}
.anonymous{color:#14487E ;}
.content_content{font-family:helvetica,arial ;font-size: medium ; color:#000000;}
.description{color:#585858;font-family:helvetica,arial ;font-size: large ;}
.credit{color:#A6A6A6;font-family:helvetica,arial ;font-size: xx-small ;}
'''
keep_only_tags = [ dict(name='div', attrs={'id':'column_action'}) ]
remove_tags_after = dict(name='ul', attrs={'id':'content_functions_bottom'})
remove_tags = [
dict(name='ul', attrs={'id':'content_functions_bottom'})
,dict(name='div', attrs={'id':['content_functions_top','breadcrumb_content']})
,dict(name='img', attrs={'class':'icon'})
,dict(name='div', attrs={'class': 'embiggen'})
]
feeds = [(u"Science News Current Issues", u'http://www.sciencenews.org/view/feed/type/edition/name/issues.rss')]
match_regexps = [
r'www.sciencenews.org/view/feature/id/',
r'www.sciencenews.org/view/generic/id'
]
def get_cover_url(self):
cover_url = None
index = 'http://www.sciencenews.org/view/home'
soup = self.index_to_soup(index)
link_item = soup.find(name = 'img',alt = "issue")
if link_item:
cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
return cover_url
def preprocess_html(self, soup):
for tag in soup.findAll(name=['span']):
tag.name = 'div'
return soup

View File

@ -255,6 +255,9 @@ class OutputProfile(Plugin):
#: Unsupported unicode characters to be replaced during preprocessing #: Unsupported unicode characters to be replaced during preprocessing
unsupported_unicode_chars = [] unsupported_unicode_chars = []
#: Number of ems that the left margin of a blockquote is rendered as
mobi_ems_per_blockquote = 1.0
@classmethod @classmethod
def tags_to_string(cls, tags): def tags_to_string(cls, tags):
return escape(', '.join(tags)) return escape(', '.join(tags))
@ -564,6 +567,7 @@ class KindleOutput(OutputProfile):
supports_mobi_indexing = True supports_mobi_indexing = True
periodical_date_in_title = False periodical_date_in_title = False
ratings_char = u'\u2605' ratings_char = u'\u2605'
mobi_ems_per_blockquote = 2.0
@classmethod @classmethod
def tags_to_string(cls, tags): def tags_to_string(cls, tags):
@ -582,6 +586,7 @@ class KindleDXOutput(OutputProfile):
comic_screen_size = (741, 1022) comic_screen_size = (741, 1022)
supports_mobi_indexing = True supports_mobi_indexing = True
periodical_date_in_title = False periodical_date_in_title = False
mobi_ems_per_blockquote = 2.0
@classmethod @classmethod
def tags_to_string(cls, tags): def tags_to_string(cls, tags):

View File

@ -117,7 +117,8 @@ class EPUBInput(InputFormatPlugin):
encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml')) encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml'))
opf = None opf = None
for f in walk(u'.'): for f in walk(u'.'):
if f.lower().endswith('.opf') and '__MACOSX' not in f: if f.lower().endswith('.opf') and '__MACOSX' not in f and \
not os.path.basename(f).startswith('.'):
opf = os.path.abspath(f) opf = os.path.abspath(f)
break break
path = getattr(stream, 'name', 'stream') path = getattr(stream, 'name', 'stream')

View File

@ -33,7 +33,10 @@ def get_metadata(stream):
le = XPath('descendant::fb2:last-name')(au) le = XPath('descendant::fb2:last-name')(au)
if le: if le:
lname = tostring(le[0]) lname = tostring(le[0])
author += ' '+lname if author:
author += ' '+lname
else:
author = lname
if author: if author:
authors.append(author) authors.append(author)
if len(authors) == 1 and author is not None: if len(authors) == 1 and author is not None:

View File

@ -184,13 +184,14 @@ class MobiMLizer(object):
elif tag in NESTABLE_TAGS and istate.rendered: elif tag in NESTABLE_TAGS and istate.rendered:
para = wrapper = bstate.nested[-1] para = wrapper = bstate.nested[-1]
elif left > 0 and indent >= 0: elif left > 0 and indent >= 0:
ems = self.profile.mobi_ems_per_blockquote
para = wrapper = etree.SubElement(parent, XHTML('blockquote')) para = wrapper = etree.SubElement(parent, XHTML('blockquote'))
para = wrapper para = wrapper
emleft = int(round(left / self.profile.fbase)) - 1 emleft = int(round(left / self.profile.fbase)) - ems
emleft = min((emleft, 10)) emleft = min((emleft, 10))
while emleft > 0: while emleft > 0:
para = etree.SubElement(para, XHTML('blockquote')) para = etree.SubElement(para, XHTML('blockquote'))
emleft -= 1 emleft -= ems
else: else:
para = wrapper = etree.SubElement(parent, XHTML('p')) para = wrapper = etree.SubElement(parent, XHTML('p'))
bstate.inline = bstate.para = para bstate.inline = bstate.para = para

View File

@ -41,6 +41,24 @@ class MOBIOutput(OutputFormatPlugin):
), ),
]) ])
def remove_image_transparencies(self):
from calibre.utils.magick.draw import save_cover_data_to
for item in self.oeb.manifest:
if item.media_type.startswith('image'):
raw = item.data
ext = item.media_type.split('/')[-1].lower()
if ext not in ('png', 'gif') or not raw:
continue
try:
data = save_cover_data_to(raw, 'img.'+ext, return_data=True)
except:
self.log.exception('Failed to remove transparency from',
item.href)
data = None
if data is not None:
item.data = data
item.unload_data_from_memory()
def check_for_periodical(self): def check_for_periodical(self):
if self.oeb.metadata.publication_type and \ if self.oeb.metadata.publication_type and \
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'): unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
@ -160,6 +178,7 @@ class MOBIOutput(OutputFormatPlugin):
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.customize.ui import plugin_for_input_format from calibre.customize.ui import plugin_for_input_format
self.remove_image_transparencies()
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
if not opts.no_inline_toc: if not opts.no_inline_toc:
tocadder = HTMLTOCAdder(title=opts.toc_title) tocadder = HTMLTOCAdder(title=opts.toc_title)

View File

@ -234,7 +234,7 @@ class MobiReader(object):
self.debug = debug self.debug = debug
self.embedded_mi = None self.embedded_mi = None
self.base_css_rules = textwrap.dedent(''' self.base_css_rules = textwrap.dedent('''
blockquote { margin: 0em 0em 0em 1.25em; text-align: justify } blockquote { margin: 0em 0em 0em 2em; text-align: justify }
p { margin: 0em; text-align: justify } p { margin: 0em; text-align: justify }

View File

@ -39,7 +39,7 @@ class RescaleImages(object):
if item.media_type.startswith('image'): if item.media_type.startswith('image'):
ext = item.media_type.split('/')[-1].upper() ext = item.media_type.split('/')[-1].upper()
if ext == 'JPG': ext = 'JPEG' if ext == 'JPG': ext = 'JPEG'
if ext not in ('PNG', 'JPEG'): if ext not in ('PNG', 'JPEG', 'GIF'):
ext = 'JPEG' ext = 'JPEG'
raw = item.data raw = item.data

View File

@ -10,7 +10,7 @@
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>382</width> <width>382</width>
<height>242</height> <height>265</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">

View File

@ -19,7 +19,7 @@
<property name="text"> <property name="text">
<string>Here you can change the metadata calibre uses to update a book when saving to disk or sending to device. <string>Here you can change the metadata calibre uses to update a book when saving to disk or sending to device.
Use this dialog to define a 'plugboard' for a format (or all formats) and a device (or all devices). The plugboard spefies what template is connected to what field. The template is used to compute a value, and that value is assigned to the connected field. Use this dialog to define a 'plugboard' for a format (or all formats) and a device (or all devices). The plugboard specifies what template is connected to what field. The template is used to compute a value, and that value is assigned to the connected field.
Often templates will contain simple references to composite columns, but this is not necessary. You can use any template in a source box that you can use elsewhere in calibre. Often templates will contain simple references to composite columns, but this is not necessary. You can use any template in a source box that you can use elsewhere in calibre.