merge from trunk

This commit is contained in:
Lee 2012-05-24 20:40:14 +08:00
commit b74dbab58e
87 changed files with 37423 additions and 34210 deletions

21
recipes/attac_es.recipe Normal file
View File

@ -0,0 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AttacEspanaRecipe (BasicNewsRecipe):
__author__ = u'Marc Busqué'
__url__ = 'http://www.lamarciana.com'
__version__ = '1.0'
__license__ = 'GPL v3'
__copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
title = u'attac.es'
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.'
url = 'http://www.attac.es'
language = 'es'
tags = 'contrainformación, información alternativa'
oldest_article = 7
remove_empty_feeds = True
no_stylesheets = True
cover_url = u'http://www.attac.es/wp-content/themes/attacweb/images/attaces.jpg'
feeds = [
(u'Attac', u'http://www.attac.es/feed'),
]

View File

@ -15,6 +15,7 @@ class BusinessWeek(BasicNewsRecipe):
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 200 max_articles_per_feed = 200
no_stylesheets = True no_stylesheets = True
auto_cleanup = True
encoding = 'utf8' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'en' language = 'en'
@ -36,12 +37,12 @@ class BusinessWeek(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
remove_tags = [ #remove_tags = [
dict(attrs={'class':'inStory'}) #dict(attrs={'class':'inStory'})
,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td']) #,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
,dict(attrs={'id':['inset','videoDisplay']}) #,dict(attrs={'id':['inset','videoDisplay']})
] #]
keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})] #keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})]
remove_attributes = ['lang'] remove_attributes = ['lang']
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*'] match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
@ -100,3 +101,4 @@ class BusinessWeek(BasicNewsRecipe):
tstr = alink.string tstr = alink.string
alink.replaceWith(tstr) alink.replaceWith(tstr)
return soup return soup

View File

@ -34,6 +34,8 @@ from BeautifulSoup import BeautifulSoup
Added new feeds Added new feeds
Updated css Updated css
Changed order of regex to speedup proces Changed order of regex to speedup proces
Version 1.9.3 23-05-2012
Updated Cover image
''' '''
class AdvancedUserRecipe1306097511(BasicNewsRecipe): class AdvancedUserRecipe1306097511(BasicNewsRecipe):
@ -51,7 +53,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
remove_empty_feeds = True remove_empty_feeds = True
cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg' cover_url = 'http://www.readmetro.com/en/holland/metro-holland/image/large/last/'
publication_type = 'newspaper' publication_type = 'newspaper'
encoding = 'utf-8' encoding = 'utf-8'
remove_attributes = ['style', 'font', 'width', 'height', 'itemtype', 'itemprop', 'itemscope']#, 'href'] remove_attributes = ['style', 'font', 'width', 'height', 'itemtype', 'itemprop', 'itemscope']#, 'href']

View File

@ -6,7 +6,7 @@ fun! CalibreLog()
" making a release. " making a release.
enew enew
read ! bzr log -l 500 read ! bzr log -l 500
set nomodifiable noswapfile buftype=nofile setl nomodifiable noswapfile buftype=nofile
edit Changelog.yaml edit Changelog.yaml
edit src/calibre/constants.py edit src/calibre/constants.py
endfun endfun

View File

@ -626,7 +626,10 @@ class HTMLPreProcessor(object):
if getattr(self.extra_opts, 'smarten_punctuation', False): if getattr(self.extra_opts, 'smarten_punctuation', False):
html = self.smarten_punctuation(html) html = self.smarten_punctuation(html)
try:
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
except AttributeError:
unsupported_unicode_chars = u''
if unsupported_unicode_chars: if unsupported_unicode_chars:
from calibre.utils.localization import get_udc from calibre.utils.localization import get_udc
unihandecoder = get_udc() unihandecoder = get_udc()

View File

@ -120,11 +120,10 @@ class OEB2HTML(object):
el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:] el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:]
def get_css(self, oeb_book): def get_css(self, oeb_book):
css = u'' css = b''
for item in oeb_book.manifest: for item in oeb_book.manifest:
if item.media_type == 'text/css': if item.media_type == 'text/css':
css = item.data.cssText css += item.data.cssText + b'\n\n'
break
return css return css
def prepare_string_for_html(self, raw): def prepare_string_for_html(self, raw):

View File

@ -377,6 +377,7 @@ class MOBIHeader(object): # {{{
a('Compression: %s'%self.compression) a('Compression: %s'%self.compression)
a('Unused: %r'%self.unused) a('Unused: %r'%self.unused)
a('Text length: %d'%self.text_length)
a('Number of text records: %d'%self.number_of_text_records) a('Number of text records: %d'%self.number_of_text_records)
a('Text record size: %d'%self.text_record_size) a('Text record size: %d'%self.text_record_size)
a('Encryption: %s'%self.encryption_type) a('Encryption: %s'%self.encryption_type)

View File

@ -411,6 +411,7 @@ class MobiWriter(object):
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1 header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
header_fields['flis_record'] = flis_number header_fields['flis_record'] = flis_number
header_fields['fcis_record'] = fcis_number header_fields['fcis_record'] = fcis_number
header_fields['text_length'] = self.text_length
extra_data_flags = 0b1 # Has multibyte overlap bytes extra_data_flags = 0b1 # Has multibyte overlap bytes
if self.primary_index_record_idx is not None: if self.primary_index_record_idx is not None:
extra_data_flags |= 0b10 extra_data_flags |= 0b10

View File

@ -42,7 +42,9 @@ class Tweak(object): # {{{
def __init__(self, name, doc, var_names, defaults, custom): def __init__(self, name, doc, var_names, defaults, custom):
translate = _ translate = _
self.name = translate(name) self.name = translate(name)
self.doc = translate(doc.strip()) self.doc = doc.strip()
if self.doc:
self.doc = translate(self.doc)
self.var_names = var_names self.var_names = var_names
self.default_values = {} self.default_values = {}
for x in var_names: for x in var_names:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -598,14 +598,16 @@ def educateQuotes(str):
str = re.sub(r"""(?<=\w)"(?=\W)""", r"""&#8221;""", str) str = re.sub(r"""(?<=\w)"(?=\W)""", r"""&#8221;""", str)
str = re.sub(r"""(?<=\w)'(?=\W)""", r"""&#8217;""", str) str = re.sub(r"""(?<=\w)'(?=\W)""", r"""&#8217;""", str)
# The following are commented out as smartypants tokenizes text by
# stripping out html tags. Therefore, there is no guarantee that the
# start-of-line and end-ol-line regex operators will match anything
# meaningful
# Special case for Quotes at end of line with a preceeding space (may change just to end of line) # Special case for Quotes at end of line with a preceeding space (may change just to end of line)
str = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", str) #str = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", str)
str = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", str) #str = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", str)
# Special case for Quotes at beginning of line with a space - multiparagraph quoted text: # Special case for Quotes at beginning of line with a space - multiparagraph quoted text:
# This case commented out as it works under the assumption that the regex pattern will always
# evaluate a complete sentence - the tokenize function called earlier in smarty will break up
# the text based on tags, so sentence fragments can be passed to the patterns as well.
#str = re.sub(r"""^"(?=\s)""", r"""&#8220;""", str) #str = re.sub(r"""^"(?=\s)""", r"""&#8220;""", str)
#str = re.sub(r"""^'(?=\s)""", r"""&#8216;""", str) #str = re.sub(r"""^'(?=\s)""", r"""&#8216;""", str)

View File

@ -854,9 +854,24 @@ ol, ul { padding-left: 2em; }
else: else:
css_styles[css2] = [name] css_styles[css2] = [name]
def filter_margins(css2):
names = { k for k, v in css2 }
ignore = set()
if {'margin-left', 'margin-right', 'margin-top',
'margin-bottom'}.issubset(names):
# These come from XML and we cannot preserve XML attribute
# order so we assume that margin is to be overridden See
# https://bugs.launchpad.net/calibre/+bug/941134 and
# https://bugs.launchpad.net/calibre/+bug/1002702
ignore.add('margin')
css2 = sorted(css2, key=lambda x:{'margin':0}.get(x[0], 1))
for k, v in css2:
if k not in ignore:
yield k, v
for css2, names in css_styles.iteritems(): for css2, names in css_styles.iteritems():
self.writeout("%s {\n" % ', '.join(names)) self.writeout("%s {\n" % ', '.join(names))
for style, val in css2: for style, val in filter_margins(css2):
self.writeout("\t%s: %s;\n" % (style, val) ) self.writeout("\t%s: %s;\n" % (style, val) )
self.writeout("}\n") self.writeout("}\n")
@ -941,19 +956,7 @@ ol, ul { padding-left: 2em; }
if self.currentstyle is None: # Added by Kovid if self.currentstyle is None: # Added by Kovid
return return
# Added by Kovid
names = {x[1]:x for x in attrs.iterkeys()}
ignore_keys = set()
if ('margin' in names and 'margin-top' in names and 'margin-left' in
names and 'margin-right' in names and 'margin-bottom' in
names):
# These come from XML and we cannot preserve XML attribute order so
# we assume that margin is to be overridden
# See https://bugs.launchpad.net/calibre/+bug/941134
ignore_keys.add(names['margin'])
for key,attr in attrs.items(): for key,attr in attrs.items():
if key not in ignore_keys:
self.styledict[self.currentstyle][key] = attr self.styledict[self.currentstyle][key] = attr