merge from trunk

This commit is contained in:
Lee 2012-05-24 20:40:14 +08:00
commit b74dbab58e
87 changed files with 37423 additions and 34210 deletions

21
recipes/attac_es.recipe Normal file
View File

@ -0,0 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AttacEspanaRecipe (BasicNewsRecipe):
__author__ = u'Marc Busqué'
__url__ = 'http://www.lamarciana.com'
__version__ = '1.0'
__license__ = 'GPL v3'
__copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
title = u'attac.es'
description = u'La Asociación por la Tasación de las Transacciones Financieras y por la Ayuda a los Ciudadanos (ATTAC) es un movimiento internacional altermundialista que promueve el control democrático de los mercados financieros y las instituciones encargadas de su control mediante la reflexión política y la movilización social.'
url = 'http://www.attac.es'
language = 'es'
tags = 'contrainformación, información alternativa'
oldest_article = 7
remove_empty_feeds = True
no_stylesheets = True
cover_url = u'http://www.attac.es/wp-content/themes/attacweb/images/attaces.jpg'
feeds = [
(u'Attac', u'http://www.attac.es/feed'),
]

View File

@ -15,6 +15,7 @@ class BusinessWeek(BasicNewsRecipe):
oldest_article = 7
max_articles_per_feed = 200
no_stylesheets = True
auto_cleanup = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
@ -36,12 +37,12 @@ class BusinessWeek(BasicNewsRecipe):
, 'language' : language
}
remove_tags = [
dict(attrs={'class':'inStory'})
,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
,dict(attrs={'id':['inset','videoDisplay']})
]
keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})]
#remove_tags = [
#dict(attrs={'class':'inStory'})
#,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
#,dict(attrs={'id':['inset','videoDisplay']})
#]
#keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})]
remove_attributes = ['lang']
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
@ -100,3 +101,4 @@ class BusinessWeek(BasicNewsRecipe):
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -34,6 +34,8 @@ from BeautifulSoup import BeautifulSoup
Added new feeds
Updated css
Changed order of regex to speedup proces
Version 1.9.3 23-05-2012
Updated Cover image
'''
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
@ -51,7 +53,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
remove_empty_feeds = True
cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
cover_url = 'http://www.readmetro.com/en/holland/metro-holland/image/large/last/'
publication_type = 'newspaper'
encoding = 'utf-8'
remove_attributes = ['style', 'font', 'width', 'height', 'itemtype', 'itemprop', 'itemscope']#, 'href']

View File

@ -6,7 +6,7 @@ fun! CalibreLog()
" making a release.
enew
read ! bzr log -l 500
set nomodifiable noswapfile buftype=nofile
setl nomodifiable noswapfile buftype=nofile
edit Changelog.yaml
edit src/calibre/constants.py
endfun

View File

@ -626,7 +626,10 @@ class HTMLPreProcessor(object):
if getattr(self.extra_opts, 'smarten_punctuation', False):
html = self.smarten_punctuation(html)
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
try:
unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
except AttributeError:
unsupported_unicode_chars = u''
if unsupported_unicode_chars:
from calibre.utils.localization import get_udc
unihandecoder = get_udc()

View File

@ -120,13 +120,12 @@ class OEB2HTML(object):
el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:]
def get_css(self, oeb_book):
css = u''
css = b''
for item in oeb_book.manifest:
if item.media_type == 'text/css':
css = item.data.cssText
break
css += item.data.cssText + b'\n\n'
return css
def prepare_string_for_html(self, raw):
raw = prepare_string_for_xml(raw)
raw = raw.replace(u'\u00ad', '&shy;')

View File

@ -377,6 +377,7 @@ class MOBIHeader(object): # {{{
a('Compression: %s'%self.compression)
a('Unused: %r'%self.unused)
a('Text length: %d'%self.text_length)
a('Number of text records: %d'%self.number_of_text_records)
a('Text record size: %d'%self.text_record_size)
a('Encryption: %s'%self.encryption_type)

View File

@ -411,6 +411,7 @@ class MobiWriter(object):
header_fields['fdst_count'] = 1 # Why not 0? Kindlegen uses 1
header_fields['flis_record'] = flis_number
header_fields['fcis_record'] = fcis_number
header_fields['text_length'] = self.text_length
extra_data_flags = 0b1 # Has multibyte overlap bytes
if self.primary_index_record_idx is not None:
extra_data_flags |= 0b10

View File

@ -42,7 +42,9 @@ class Tweak(object): # {{{
def __init__(self, name, doc, var_names, defaults, custom):
translate = _
self.name = translate(name)
self.doc = translate(doc.strip())
self.doc = doc.strip()
if self.doc:
self.doc = translate(self.doc)
self.var_names = var_names
self.default_values = {}
for x in var_names:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -597,15 +597,17 @@ def educateQuotes(str):
str = re.sub(r"""(?<=\W)'(?=\w)""", r"""&#8216;""", str)
str = re.sub(r"""(?<=\w)"(?=\W)""", r"""&#8221;""", str)
str = re.sub(r"""(?<=\w)'(?=\W)""", r"""&#8217;""", str)
# The following are commented out as smartypants tokenizes text by
# stripping out html tags. Therefore, there is no guarantee that the
# start-of-line and end-ol-line regex operators will match anything
# meaningful
# Special case for Quotes at end of line with a preceeding space (may change just to end of line)
str = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", str)
str = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", str)
#str = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", str)
#str = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", str)
# Special case for Quotes at beginning of line with a space - multiparagraph quoted text:
# This case commented out as it works under the assumption that the regex pattern will always
# evaluate a complete sentence - the tokenize function called earlier in smarty will break up
# the text based on tags, so sentence fragments can be passed to the patterns as well.
#str = re.sub(r"""^"(?=\s)""", r"""&#8220;""", str)
#str = re.sub(r"""^'(?=\s)""", r"""&#8216;""", str)

View File

@ -854,9 +854,24 @@ ol, ul { padding-left: 2em; }
else:
css_styles[css2] = [name]
def filter_margins(css2):
names = { k for k, v in css2 }
ignore = set()
if {'margin-left', 'margin-right', 'margin-top',
'margin-bottom'}.issubset(names):
# These come from XML and we cannot preserve XML attribute
# order so we assume that margin is to be overridden See
# https://bugs.launchpad.net/calibre/+bug/941134 and
# https://bugs.launchpad.net/calibre/+bug/1002702
ignore.add('margin')
css2 = sorted(css2, key=lambda x:{'margin':0}.get(x[0], 1))
for k, v in css2:
if k not in ignore:
yield k, v
for css2, names in css_styles.iteritems():
self.writeout("%s {\n" % ', '.join(names))
for style, val in css2:
for style, val in filter_margins(css2):
self.writeout("\t%s: %s;\n" % (style, val) )
self.writeout("}\n")
@ -941,20 +956,8 @@ ol, ul { padding-left: 2em; }
if self.currentstyle is None: # Added by Kovid
return
# Added by Kovid
names = {x[1]:x for x in attrs.iterkeys()}
ignore_keys = set()
if ('margin' in names and 'margin-top' in names and 'margin-left' in
names and 'margin-right' in names and 'margin-bottom' in
names):
# These come from XML and we cannot preserve XML attribute order so
# we assume that margin is to be overridden
# See https://bugs.launchpad.net/calibre/+bug/941134
ignore_keys.add(names['margin'])
for key,attr in attrs.items():
if key not in ignore_keys:
self.styledict[self.currentstyle][key] = attr
self.styledict[self.currentstyle][key] = attr
familymap = {'frame':'frame', 'paragraph':'p', 'presentation':'presentation',