Merge from trunk

This commit is contained in:
Charles Haley 2011-03-22 12:57:34 +00:00
commit c5b2de9663
33 changed files with 796 additions and 516 deletions

79
recipes/caijing.recipe Normal file
View File

@ -0,0 +1,79 @@
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class Caijing(BasicNewsRecipe):
title = 'Caijing Magazine'
__author__ = 'Eric Chen'
description = '''Bi-weekly Finance and Economics Review. Founded in 1998, the fortnightly CAIJING
Magazine has firmly established itself as a news authority and leading voice for
business and financial issues in China.
CAIJING Magazine closely tracks the most important aspects of China's economic reforms,
developments and policy changes, as well as major events in the capital markets. It also
offers a broad international perspective through first-hand reporting on international
political and economic issues.
CAIJING Magazine is China's most widely read business and finance magazine, with a
circulation of 225,000 per issue. It boasts top-level readers from government, business
and academic circles. '''
language = 'zh'
category = 'news, China'
encoding = 'UTF-8'
timefmt = ' [%a, %d %b, %Y]'
needs_subscription = True
remove_tags = [dict(attrs={'class':['topad', 'nav', 'searchbox', 'connav',
'mbx', 'bianji', 'bianji bj', 'lnewlist', 'rdtj', 'loadComment',
'conr', 'bottom', 'bottomcopyr', 'emaildy', 'rcom', 'allcontent']}),
dict(name=['script', 'noscript', 'style'])]
no_stylesheets = True
remove_javascript = True
current_issue_url = ""
current_issue_cover = ""
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://service.caijing.com.cn/usermanage/login')
br.select_form(name='mainLoginForm')
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
articles = []
soup0 = self.index_to_soup('http://magazine.caijing.com.cn/2011/cjindex2011/')
div = soup0.find('div', attrs={'class':'fmcon'})
link = div.find('a', href=True)
current_issue_url = link['href']
soup = self.index_to_soup(current_issue_url)
for div_cover in soup.findAll('img', {'src' : re.compile('.')}):
if re.search('\d{4}-\d{2}-\d{2}', div_cover['src']):
self.current_issue_cover = div_cover['src']
feeds = []
for section in soup.findAll('div', attrs={'class':'cebd'}):
section_title = self.tag_to_string(section.find('div', attrs={'class':'ceti'}))
articles = []
for post in section.findAll('a', href=True):
if re.search('\d{4}-\d{2}-\d{2}', post['href']):
date = re.search('\d{4}-\d{2}-\d{2}', post['href']).group(0)
id = re.search('\d{9}', post['href']).group(0)
url = re.sub(r'\d.*', 'templates/inc/chargecontent2.jsp?id=', post['href'])
url = url + id + '&time=' + date + '&cl=106&page=all'
title = self.tag_to_string(post)
articles.append({'title':title, 'url':url, 'date':date})
if articles:
feeds.append((section_title, articles))
return feeds
def get_cover_url(self):
return self.current_issue_cover

View File

@ -8,13 +8,13 @@ __description__ = 'Providing context and clarity on national and international n
'''csmonitor.com'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class ChristianScienceMonitor(BasicNewsRecipe):
author = 'Kovid Goyal, Sujata Raman and Lorenzo Vigentini'
__author__ = 'Kovid Goyal'
description = 'Providing context and clarity on national and international news, peoples and cultures'
cover_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
@ -34,6 +34,49 @@ class ChristianScienceMonitor(BasicNewsRecipe):
remove_javascript = True
no_stylesheets = True
def append_page(self, soup, appendtag, position):
nav = soup.find('div',attrs={'class':'navigation'})
if nav:
pager = nav.findAll('a')
for part in pager:
if 'Next' in part:
nexturl = ('http://www.csmonitor.com' +
re.findall(r'href="(.*?)"', str(part))[0])
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div',
attrs={'class': re.compile('list-article-.*')})
trash_c = soup2.findAll(attrs={'class': 'list-description'})
trash_h = soup2.h1
for tc in trash_c: tc.extract()
trash_h.extract()
newpos = len(texttag.contents)
self.append_page(soup2, texttag, newpos)
texttag.extract()
appendtag.insert(position, texttag)
def preprocess_html(self, soup):
PRINT_RE = re.compile(r'/layout/set/print/content/view/print/[0-9]*')
html = str(soup)
try:
print_found = PRINT_RE.findall(html)
except Exception:
pass
if print_found:
print_url = 'http://www.csmonitor.com' + print_found[0]
print_soup = self.index_to_soup(print_url)
else:
self.append_page(soup, soup.body, 3)
trash_a = soup.findAll(attrs={'class': re.compile('navigation.*')})
trash_b = soup.findAll(attrs={'style': re.compile('.*')})
trash_d = soup.findAll(attrs={'class': 'sByline'})
for ta in trash_a: ta.extract()
for tb in trash_b: tb.extract()
for td in trash_d: td.extract()
print_soup = soup
return print_soup
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
@ -43,7 +86,6 @@ class ChristianScienceMonitor(BasicNewsRecipe):
(r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
lambda match : '</body>'),
]]
extra_css = '''
h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
.sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
@ -56,10 +98,9 @@ class ChristianScienceMonitor(BasicNewsRecipe):
#main{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
#photo-details{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
span.name{color:#205B87;font-family: Georgia,Times,"Times New Roman",serif; font-size: x-small}
p#dateline{color:#444444 ; font-family:Arial,Helvetica,sans-serif ; font-style:italic;}
'''
feeds = [
(u'Top Stories' , u'http://rss.csmonitor.com/feeds/top'),
p#dateline{color:#444444 ; font-family:Arial,Helvetica,sans-serif ; font-style:italic;} '''
feeds = [(u'Top Stories', u'http://rss.csmonitor.com/feeds/top'),
(u'World' , u'http://rss.csmonitor.com/feeds/world'),
(u'USA' , u'http://rss.csmonitor.com/feeds/usa'),
(u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary'),
@ -74,9 +115,7 @@ class ChristianScienceMonitor(BasicNewsRecipe):
(u'Home Forum' , u'http://rss.csmonitor.com/feeds/homeforum')
]
keep_only_tags = [
dict(name='div', attrs={'id':'mainColumn'}),
]
keep_only_tags = [dict(name='div', attrs={'id':'mainColumn'}), ]
remove_tags = [
dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
@ -86,7 +125,10 @@ class ChristianScienceMonitor(BasicNewsRecipe):
'hide', 'podBrdr']}),
dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
dict(name='form', attrs={'id':[ 'commentform']}) ,
dict(name='div', attrs={'class': ['ui-comments']})
]
remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']})]
remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']}),
dict(name='div', attrs={'class': [re.compile('navigation.*')]}),
dict(name='div', attrs={'style': [re.compile('.*')]})
]

View File

@ -1,4 +1,3 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
@ -6,55 +5,48 @@ class WashingtonPost(BasicNewsRecipe):
title = 'Washington Post'
description = 'US political news'
__author__ = 'Kovid Goyal and Sujata Raman'
__author__ = 'Kovid Goyal'
use_embedded_content = False
max_articles_per_feed = 20
language = 'en'
encoding = 'utf-8'
remove_javascript = True
no_stylesheets = True
extra_css = '''
#articleCopyright { font-family:Arial,helvetica,sans-serif ; font-weight:bold ; font-size:x-small ;}
p { font-family:"Times New Roman",times,serif ; font-weight:normal ; font-size:small ;}
body{font-family:arial,helvetica,sans-serif}
'''
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'),
('Nation', 'http://www.washingtonpost.com/wp-dyn/rss/nation/index.xml'),
('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'),
('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'),
('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'),
('Health', 'http://www.washingtonpost.com/wp-dyn/rss/health/index.xml'),
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
('Style',
'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
('NFL Sports',
'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
feeds = [
('Politics', 'http://www.washingtonpost.com/rss/politics'),
('Nation', 'http://www.washingtonpost.com/rss/national'),
('World', 'http://www.washingtonpost.com/rss/world'),
('Business', 'http://www.washingtonpost.com/rss/business'),
('Lifestyle', 'http://www.washingtonpost.com/rss/lifestyle'),
('Sports', 'http://www.washingtonpost.com/rss/sports'),
('Redskins', 'http://www.washingtonpost.com/rss/sports/redskins'),
('Opinions', 'http://www.washingtonpost.com/rss/opinions'),
('Entertainment', 'http://www.washingtonpost.com/rss/entertainment'),
('Local', 'http://www.washingtonpost.com/rss/local'),
('Investigations',
'http://www.washingtonpost.com/rss/investigations'),
]
remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
remove_tags = [
{'class':lambda x: x and 'article-toolbar' in x},
{'class':lambda x: x and 'quick-comments' in x},
{'class':lambda x: x and 'tweet' in x},
{'class':lambda x: x and 'article-related' in x},
{'class':lambda x: x and 'hidden' in x.split()},
{'class':lambda x: x and 'also-read' in x.split()},
{'class':lambda x: x and 'partners-content' in x.split()},
{'class':['module share', 'module ads', 'comment-vars', 'hidden',
'share-icons-wrap', 'comments']},
{'id':['right-rail']},
]
keep_only_tags = dict(id=['content', 'article'])
def get_article_url(self, article):
return article.get('guid', article.get('link', None))
def print_version(self, url):
return url.rpartition('.')[0] + '_pf.html'
url = url.rpartition('?')[0]
return url.replace('_story.html', '_singlePage.html')
def postprocess_html(self, soup, first):
for div in soup.findAll(name='div', style=re.compile('margin')):
div['style'] = ''
return soup
def preprocess_html(self, soup):
for tag in soup.findAll('font'):
if tag.has_key('size'):
if tag['size'] == '+2':
if tag.b:
return soup
return None

View File

@ -18,6 +18,6 @@ def recipe_title_callback(raw):
return eval(raw.decode('utf-8'))
vipy.session.add_content_browser('.r', ',r', 'Recipe',
vipy.session.glob_based_iterator(os.path.join(project_dir, 'resources', 'recipes', '*.recipe')),
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
EOFPY

View File

@ -14,9 +14,9 @@ from setup.build_environment import HOST, PROJECT
BASE_RSYNC = ['rsync', '-avz', '--delete']
EXCLUDES = []
for x in [
'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac',
'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac', 'recipes',
'.bzr', '.build', '.svn', 'build', 'dist', 'imgsrc', '*.pyc', '*.pyo', '*.swp',
'*.swo']:
'*.swo', 'format_docs']:
EXCLUDES.extend(['--exclude', x])
SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]
@ -138,7 +138,7 @@ class VMInstaller(Command):
self.vm = self.VM
if not self.vmware_started():
self.start_vmware()
subprocess.call(['chmod', '-R', '+r', 'resources/recipes'])
subprocess.call(['chmod', '-R', '+r', 'recipes'])
self.start_vm()
self.download_installer()
if not self.dont_shutdown:

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os, shutil, glob, py_compile, subprocess, re
import sys, os, shutil, glob, py_compile, subprocess, re, zipfile, time
from setup import Command, modules, functions, basenames, __version__, \
__appname__
@ -40,6 +40,13 @@ DESCRIPTIONS = {
'calibre-smtp' : 'Command line interface for sending books via email',
}
def walk(dir):
''' A nice interface to os.walk '''
for record in os.walk(dir):
for f in record[-1]:
yield os.path.join(record[0], f)
class Win32Freeze(Command, WixMixIn):
description = 'Free windows calibre installation'
@ -63,12 +70,15 @@ class Win32Freeze(Command, WixMixIn):
self.rc_template = self.j(self.d(self.a(__file__)), 'template.rc')
self.py_ver = ''.join(map(str, sys.version_info[:2]))
self.lib_dir = self.j(self.base, 'Lib')
self.pydlib = self.j(self.base, 'pydlib')
self.pylib = self.j(self.base, 'pylib.zip')
self.initbase()
self.build_launchers()
self.freeze()
self.embed_manifests()
self.install_site_py()
self.archive_lib_dir()
self.create_installer()
def initbase(self):
@ -356,4 +366,108 @@ class Win32Freeze(Command, WixMixIn):
dest, lib]
self.run_builder(cmd)
def archive_lib_dir(self):
self.info('Putting all python code into a zip file for performance')
if os.path.exists(self.pydlib):
shutil.rmtree(self.pydlib)
os.makedirs(self.pydlib)
self.zf_timestamp = time.localtime(time.time())[:6]
self.zf_names = set()
with zipfile.ZipFile(self.pylib, 'w', zipfile.ZIP_STORED) as zf:
for x in os.listdir(self.lib_dir):
if x == 'site-packages':
continue
self.add_to_zipfile(zf, x, self.lib_dir)
sp = self.j(self.lib_dir, 'site-packages')
handled = set(['site.pyo'])
for pth in ('PIL.pth', 'pywin32.pth'):
handled.add(pth)
shutil.copyfile(self.j(sp, pth), self.j(self.pydlib, pth))
for d in self.get_pth_dirs(self.j(sp, pth)):
shutil.copytree(d, self.j(self.pydlib, self.b(d)), True)
handled.add(self.b(d))
handled.add('easy-install.pth')
for d in self.get_pth_dirs(self.j(sp, 'easy-install.pth')):
handled.add(self.b(d))
zip_safe = self.is_zip_safe(d)
for x in os.listdir(d):
if x == 'EGG-INFO':
continue
if zip_safe:
self.add_to_zipfile(zf, x, d)
else:
absp = self.j(d, x)
dest = self.j(self.pydlib, x)
if os.path.isdir(absp):
shutil.copytree(absp, dest, True)
else:
shutil.copy2(absp, dest)
for x in os.listdir(sp):
if x in handled or x.endswith('.egg-info'):
continue
absp = self.j(sp, x)
if os.path.isdir(absp):
if not os.listdir(absp):
continue
if self.is_zip_safe(absp):
self.add_to_zipfile(zf, x, sp)
else:
shutil.copytree(absp, self.j(self.pydlib, x), True)
else:
if x.endswith('.pyd'):
shutil.copy2(absp, self.j(self.pydlib, x))
else:
self.add_to_zipfile(zf, x, sp)
shutil.rmtree(self.lib_dir)
def is_zip_safe(self, path):
for f in walk(path):
ext = os.path.splitext(f)[1].lower()
if ext in ('.pyd', '.dll', '.exe'):
return False
return True
def get_pth_dirs(self, pth):
base = os.path.dirname(pth)
for line in open(pth).readlines():
line = line.strip()
if not line or line.startswith('#') or line.startswith('import'):
continue
if line == 'win32\\lib':
continue
candidate = self.j(base, line)
if os.path.exists(candidate):
yield candidate
def add_to_zipfile(self, zf, name, base, exclude=frozenset()):
abspath = self.j(base, name)
name = name.replace(os.sep, '/')
if name in self.zf_names:
raise ValueError('Already added %r to zipfile [%r]'%(name, abspath))
zinfo = zipfile.ZipInfo(filename=name, date_time=self.zf_timestamp)
if os.path.isdir(abspath):
if not os.listdir(abspath):
return
zinfo.external_attr = 0700 << 16
zf.writestr(zinfo, '')
for x in os.listdir(abspath):
if x not in exclude:
self.add_to_zipfile(zf, name + os.sep + x, base)
else:
ext = os.path.splitext(name)[1].lower()
if ext in ('.pyd', '.dll', '.exe'):
raise ValueError('Cannot add %r to zipfile'%abspath)
zinfo.external_attr = 0600 << 16
if ext in ('.py', '.pyc', '.pyo'):
with open(abspath, 'rb') as f:
zf.writestr(zinfo, f.read())
self.zf_names.add(name)

View File

@ -96,7 +96,7 @@ def main():
abs__file__()
addsitedir(os.path.join(sys.app_dir, 'Lib', 'site-packages'))
addsitedir(os.path.join(sys.app_dir, 'pydlib'))
add_calibre_vars()

View File

@ -198,7 +198,7 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
buf[strlen(buf)-1] = '\0';
_snprintf_s(python_home, MAX_PATH, _TRUNCATE, "%s", buf);
_snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\DLLs;%s\\Lib;%s\\Lib\\site-packages",
_snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\pylib.zip;%s\\pydlib;%s\\DLLs",
buf, buf, buf);
free(buf);

View File

@ -154,9 +154,9 @@
<CustomAction Id="LaunchApplication" BinaryKey="WixCA"
DllEntry="WixShellExec" Impersonate="yes"/>
<InstallUISequence>
<!--<InstallUISequence>
<FileCost Suppress="yes" />
</InstallUISequence>
</InstallUISequence>-->
</Product>
</Wix>

View File

@ -58,12 +58,14 @@ class ANDROID(USBMS):
0x413c : { 0xb007 : [0x0100, 0x0224]},
# LG
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100] },
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
# Archos
0x0e79 : {
0x1400 : [0x0222, 0x0216],
0x1408 : [0x0222, 0x0216],
0x1411 : [0x216],
0x1417 : [0x0216],
0x1419 : [0x0216],
0x1420 : [0x0216],
0x1422 : [0x0216]
@ -91,14 +93,14 @@ class ANDROID(USBMS):
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC']
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955']
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7']

View File

@ -19,7 +19,7 @@ class BLACKBERRY(USBMS):
VENDOR_ID = [0x0fca]
PRODUCT_ID = [0x8004, 0x0004]
BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211]
BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220]
VENDOR_NAME = 'RIM'
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'

View File

@ -49,6 +49,8 @@ HEURISTIC_OPTIONS = ['markup_chapter_headings',
'dehyphenate', 'renumber_headings',
'replace_scene_breaks']
DEFAULT_TRUE_OPTIONS = HEURISTIC_OPTIONS + ['remove_fake_margins']
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
@ -90,7 +92,7 @@ def option_recommendation_to_cli_option(add_option, rec):
if opt.long_switch == 'verbose':
attrs['action'] = 'count'
attrs.pop('type', '')
if opt.name in HEURISTIC_OPTIONS and rec.recommended_value is True:
if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True:
switches = ['--disable-'+opt.long_switch]
add_option(Option(*switches, **attrs))
@ -162,6 +164,7 @@ def add_pipeline_options(parser, plumber):
'chapter', 'chapter_mark',
'prefer_metadata_cover', 'remove_first_image',
'insert_metadata', 'page_breaks_before',
'remove_fake_margins',
]
),

View File

@ -304,6 +304,17 @@ OptionRecommendation(name='page_breaks_before',
'before the specified elements.')
),
OptionRecommendation(name='remove_fake_margins',
recommended_value=True, level=OptionRecommendation.LOW,
help=_('Some documents specify page margins by '
'specifying a left and right margin on each individual '
'paragraph. calibre will try to detect and remove these '
'margins. Sometimes, this can cause the removal of '
'margins that should not have been removed. In this '
'case you can disable the removal.')
),
OptionRecommendation(name='margin_top',
recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the top margin in pts. Default is %default. '
@ -988,9 +999,13 @@ OptionRecommendation(name='sr3_replace',
page_break_on_body=self.output_plugin.file_type in ('mobi',
'lit'))
flattener(self.oeb, self.opts)
self.opts.insert_blank_line = oibl
self.opts.remove_paragraph_spacing = orps
from calibre.ebooks.oeb.transforms.page_margin import RemoveFakeMargins
RemoveFakeMargins()(self.oeb, self.log, self.opts)
pr(0.9)
self.flush()

View File

@ -28,11 +28,12 @@ class Worker(Thread): # {{{
Get book details from amazons book page in a separate thread
'''
def __init__(self, url, result_queue, browser, log, timeout=20):
def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
Thread.__init__(self)
self.daemon = True
self.url, self.result_queue = url, result_queue
self.log, self.timeout = log, timeout
self.relevance, self.plugin = relevance, plugin
self.browser = browser.clone_browser()
self.cover_url = self.amazon_id = self.isbn = None
@ -161,6 +162,15 @@ class Worker(Thread): # {{{
else:
self.log.warning('Failed to find product description for url: %r'%self.url)
mi.source_relevance = self.relevance
if self.amazon_id:
if self.isbn:
self.plugin.cache_isbn_to_identifier(self.isbn, self.amazon_id)
if self.cover_url:
self.cache_identifier_to_cover_url(self.amazon_id,
self.cover_url)
self.result_queue.put(mi)
def parse_asin(self, root):
@ -321,6 +331,20 @@ class Amazon(Source):
# }}}
def get_cached_cover_url(self, identifiers):
url = None
asin = identifiers.get('amazon', None)
if asin is None:
asin = identifiers.get('asin', None)
if asin is None:
isbn = identifiers.get('isbn', None)
if isbn is not None:
asin = self.cached_isbn_to_identifier(isbn)
if asin is not None:
url = self.cached_identifier_to_cover_url(asin)
return url
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30):
'''
@ -396,7 +420,8 @@ class Amazon(Source):
log.error('No matches found with query: %r'%query)
return
workers = [Worker(url, result_queue, br, log) for url in matches]
workers = [Worker(url, result_queue, br, log, i, self) for i, url in
enumerate(matches)]
for w in workers:
w.start()
@ -414,14 +439,6 @@ class Amazon(Source):
if not a_worker_is_alive:
break
for w in workers:
if w.amazon_id:
if w.isbn:
self.cache_isbn_to_identifier(w.isbn, w.amazon_id)
if w.cover_url:
self.cache_identifier_to_cover_url(w.amazon_id,
w.cover_url)
return None
# }}}

View File

@ -21,6 +21,21 @@ def create_log(ostream=None):
log.outputs = [FileStream(ostream)]
return log
words = ("the", "a", "an", "of", "and")
prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
trailing_paren_pat = re.compile(r'\(.*\)$')
whitespace_pat = re.compile(r'\s+')
def cleanup_title(s):
if not s:
s = _('Unknown')
s = s.strip().lower()
s = prefix_pat.sub(' ', s)
s = trailing_paren_pat.sub('', s)
s = whitespace_pat.sub(' ', s)
return s.strip()
class Source(Plugin):
type = _('Metadata source')
@ -128,10 +143,91 @@ class Source(Plugin):
gr.append(job)
return [g for g in groups if g]
def test_fields(self, mi):
'''
Return the first field from self.touched_fields that is null on the
mi object
'''
for key in self.touched_fields:
if key.startswith('identifier:'):
key = key.partition(':')[-1]
if not mi.has_identifier(key):
return 'identifier: ' + key
elif mi.is_null(key):
return key
# }}}
# Metadata API {{{
def get_cached_cover_url(self, identifiers):
'''
Return cached cover URL for the book identified by
the identifiers dict or Noneif no such URL exists
'''
return None
def compare_identify_results(self, x, y, title=None, authors=None,
identifiers={}):
'''
Method used to sort the results from a call to identify by relevance.
Uses the actual query and various heuristics to rank results.
Re-implement in your plugin if this generic algorithm is not suitable.
Note that this method assumes x and y have a source_relevance
attribute.
one < two iff one is more relevant than two
'''
# First, guarantee that if the query specifies an ISBN, the result with
# the same isbn is the most relevant
def isbn_test(mi):
return mi.isbn and mi.isbn == identifiers.get('isbn', None)
def boolcmp(a, b):
return -1 if a and not b else 1 if not a and b else 0
x_has_isbn, y_has_isbn = isbn_test(x), isbn_test(y)
result = boolcmp(x_has_isbn, y_has_isbn)
if result != 0:
return result
# Now prefer results that have complete metadata over those that don't
x_has_all_fields = self.test_fields(x) is None
y_has_all_fields = self.test_fields(y) is None
result = boolcmp(x_has_all_fields, y_has_all_fields)
if result != 0:
return result
# Now prefer results whose title matches the search query
if title:
x_title = cleanup_title(x.title)
y_title = cleanup_title(y.title)
t = cleanup_title(title)
x_has_title, y_has_title = x_title == t, y_title == t
result = boolcmp(x_has_title, y_has_title)
if result != 0:
return result
# Now prefer results with the longer comments, within 10%
cx = len(x.comments.strip() if x.comments else '')
cy = len(y.comments.strip() if y.comments else '')
t = (cx + cy) / 20
result = cy - cx
if result != 0 and abs(cx - cy) > t:
return result
# Now prefer results with cached cover URLs
x_has_cover = self.get_cached_cover_url(x.identifiers) is not None
y_has_cover = self.get_cached_cover_url(y.identifiers) is not None
result = boolcmp(x_has_cover, y_has_cover)
if result != 0:
return result
# Now use the relevance reported by the remote search engine
return x.source_relevance - y.source_relevance
def identify(self, log, result_queue, abort, title=None, authors=None,
identifiers={}, timeout=5):
'''
@ -147,6 +243,15 @@ class Source(Plugin):
the same ISBN/special identifier does not need to get the cover URL
again. Use the caching API for this.
Every Metadata object put into result_queue by this method must have a
`source_relevance` attribute that is an integer indicating the order in
which the results were returned by the metadata source for this query.
This integer will be used by :meth:`compare_identify_results`. If the
order is unimportant, set it to zero for every result.
Make sure that any cover/isbn mapping information is cached before the
Metadata object is put into result_queue.
:param log: A log object, use it to output debugging information/errors
:param result_queue: A result Queue, results should be put into it.
Each result is a Metadata object

View File

@ -190,14 +190,15 @@ class GoogleBooks(Source):
return raw and len(raw) > 17000 and raw[1:4] != 'PNG'
def get_all_details(self, br, log, entries, abort, result_queue, timeout):
for i in entries:
for relevance, i in enumerate(entries):
try:
ans = to_metadata(br, log, i, timeout)
if isinstance(ans, Metadata):
result_queue.put(ans)
ans.source_relevance = relevance
for isbn in getattr(ans, 'all_isbns', []):
self.cache_isbn_to_identifier(isbn,
ans.identifiers['google'])
result_queue.put(ans)
except:
log.exception(
'Failed to get metadata for identify entry:',

View File

@ -46,15 +46,6 @@ def authors_test(authors):
return test
def _test_fields(touched_fields, mi):
for key in touched_fields:
if key.startswith('identifier:'):
key = key.partition(':')[-1]
if not mi.has_identifier(key):
return 'identifier: ' + key
elif mi.is_null(key):
return key
def test_identify_plugin(name, tests):
'''
@ -120,11 +111,10 @@ def test_identify_plugin(name, tests):
prints('Log saved to', lf)
raise SystemExit(1)
good = [x for x in possibles if _test_fields(plugin.touched_fields, x) is
good = [x for x in possibles if plugin.test_fields(x) is
None]
if not good:
prints('Failed to find', _test_fields(plugin.touched_fields,
possibles[0]))
prints('Failed to find', plugin.test_fields(possibles[0]))
raise SystemExit(1)

View File

@ -0,0 +1,153 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from collections import Counter
from calibre.ebooks.oeb.base import OEB_STYLES, barename, XPath
class RemoveFakeMargins(object):
'''
Remove left and right margins from paragraph/divs if the same margin is specified
on almost all the elements of at that level.
Must be called only after CSS flattening
'''
def __call__(self, oeb, log, opts):
if not opts.remove_fake_margins:
return
self.oeb, self.log, self.opts = oeb, log, opts
stylesheet = None
self.levels = {}
self.stats = {}
self.selector_map = {}
for item in self.oeb.manifest:
if item.media_type.lower() in OEB_STYLES:
stylesheet = item
break
if stylesheet is None:
return
self.log('Removing fake margins...')
stylesheet = stylesheet.data
from cssutils.css import CSSRule
for rule in stylesheet.cssRules.rulesOfType(CSSRule.STYLE_RULE):
self.selector_map[rule.selectorList.selectorText] = rule.style
self.find_levels()
for level in self.levels:
self.process_level(level)
def get_margins(self, elem):
cls = elem.get('class', None)
if cls:
style = self.selector_map.get('.'+cls, None)
if style:
return style.marginLeft, style.marginRight, style
return '', '', None
def process_level(self, level):
elems = self.levels[level]
self.stats[level+'_left'] = Counter()
self.stats[level+'_right'] = Counter()
for elem in elems:
lm, rm = self.get_margins(elem)[:2]
self.stats[level+'_left'][lm] += 1
self.stats[level+'_right'][rm] += 1
self.log.debug(level, ' left margin stats:', self.stats[level+'_left'])
self.log.debug(level, ' right margin stats:', self.stats[level+'_right'])
remove_left = self.analyze_stats(self.stats[level+'_left'])
remove_right = self.analyze_stats(self.stats[level+'_right'])
if remove_left:
mcl = self.stats[level+'_left'].most_common(1)[0][0]
self.log('Removing level %s left margin of:'%level, mcl)
if remove_right:
mcr = self.stats[level+'_right'].most_common(1)[0][0]
self.log('Removing level %s right margin of:'%level, mcr)
if remove_left or remove_right:
for elem in elems:
lm, rm, style = self.get_margins(elem)
if remove_left and lm == mcl:
style.removeProperty('margin-left')
if remove_right and rm == mcr:
style.removeProperty('margin-right')
def find_levels(self):
def level_of(elem, body):
ans = 1
while elem.getparent() is not body:
ans += 1
elem = elem.getparent()
return ans
paras = XPath('descendant::h:p|descendant::h:div')
for item in self.oeb.spine:
body = XPath('//h:body')(item.data)
if not body:
continue
body = body[0]
for p in paras(body):
level = level_of(p, body)
level = '%s_%d'%(barename(p.tag), level)
if level not in self.levels:
self.levels[level] = []
self.levels[level].append(p)
remove = set()
for k, v in self.levels.iteritems():
num = len(v)
self.log.debug('Found %d items of level:'%num, k)
level = int(k.split('_')[-1])
tag = k.split('_')[0]
if tag == 'p' and num < 25:
remove.add(k)
if tag == 'div':
if level > 2 and num < 25:
remove.add(k)
elif level < 3:
# Check each level < 3 element and only keep those
# that have many child paras
for elem in list(v):
children = len(paras(elem))
if children < 5:
v.remove(elem)
for k in remove:
self.levels.pop(k)
self.log.debug('Ignoring level', k)
def analyze_stats(self, stats):
if not stats:
return False
mc = stats.most_common(1)
if len(mc) > 1:
return False
mc = mc[0]
most_common, most_common_count = mc
if not most_common or most_common == '0':
return False
total = sum(stats.values())
# True if greater than 95% of elements have the same margin
return most_common_count/total > 0.95

View File

@ -22,6 +22,7 @@ border_style_map = {
'dot-dot-dash': 'dotted',
'outset': 'outset',
'tripple': 'double',
'triple': 'double',
'thick-thin-small': 'solid',
'thin-thick-small': 'solid',
'thin-thick-thin-small': 'solid',

View File

@ -121,97 +121,108 @@ class Textile(object):
btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
btag_lite = ('bq', 'bc', 'p')
glyph_defaults = (
('mac_cent', '&#162;'),
('mac_pound', '&#163;'),
('mac_yen', '&#165;'),
('mac_quarter', '&#188;'),
('mac_half', '&#189;'),
('mac_three-quarter', '&#190;'),
('mac_cA-grave', '&#192;'),
('mac_cA-acute', '&#193;'),
('mac_cA-circumflex', '&#194;'),
('mac_cA-tilde', '&#195;'),
('mac_cA-diaeresis', '&#196;'),
('mac_cA-ring', '&#197;'),
('mac_cAE', '&#198;'),
('mac_cC-cedilla', '&#199;'),
('mac_cE-grave', '&#200;'),
('mac_cE-acute', '&#201;'),
('mac_cE-circumflex', '&#202;'),
('mac_cE-diaeresis', '&#203;'),
('mac_cI-grave', '&#204;'),
('mac_cI-acute', '&#205;'),
('mac_cI-circumflex', '&#206;'),
('mac_cI-diaeresis', '&#207;'),
('mac_cEth', '&#208;'),
('mac_cN-tilde', '&#209;'),
('mac_cO-grave', '&#210;'),
('mac_cO-acute', '&#211;'),
('mac_cO-circumflex', '&#212;'),
('mac_cO-tilde', '&#213;'),
('mac_cO-diaeresis', '&#214;'),
('mac_cO-stroke', '&#216;'),
('mac_cU-grave', '&#217;'),
('mac_cU-acute', '&#218;'),
('mac_cU-circumflex', '&#219;'),
('mac_cU-diaeresis', '&#220;'),
('mac_cY-acute', '&#221;'),
('mac_sa-grave', '&#224;'),
('mac_sa-acute', '&#225;'),
('mac_sa-circumflex', '&#226;'),
('mac_sa-tilde', '&#227;'),
('mac_sa-diaeresis', '&#228;'),
('mac_sa-ring', '&#229;'),
('mac_sae', '&#230;'),
('mac_sc-cedilla', '&#231;'),
('mac_se-grave', '&#232;'),
('mac_se-acute', '&#233;'),
('mac_se-circumflex', '&#234;'),
('mac_se-diaeresis', '&#235;'),
('mac_si-grave', '&#236;'),
('mac_si-acute', '&#237;'),
('mac_si-circumflex', '&#238;'),
('mac_si-diaeresis', '&#239;'),
('mac_sn-tilde', '&#241;'),
('mac_so-grave', '&#242;'),
('mac_so-acute', '&#243;'),
('mac_so-circumflex', '&#244;'),
('mac_so-tilde', '&#245;'),
('mac_so-diaeresis', '&#246;'),
('mac_so-stroke', '&#248;'),
('mac_su-grave', '&#249;'),
('mac_su-acute', '&#250;'),
('mac_su-circumflex', '&#251;'),
('mac_su-diaeresis', '&#252;'),
('mac_sy-acute', '&#253;'),
('mac_sy-diaeresis', '&#255;'),
('mac_cOE', '&#338;'),
('mac_soe', '&#339;'),
('mac_bullet', '&#8226;'),
('mac_franc', '&#8355;'),
('mac_lira', '&#8356;'),
('mac_rupee', '&#8360;'),
('mac_euro', '&#8364;'),
('mac_spade', '&#9824;'),
('mac_club', '&#9827;'),
('mac_heart', '&#9829;'),
('mac_diamond', '&#9830;'),
('txt_dimension', '&#215;'),
('txt_quote_single_open', '&#8216;'),
('txt_quote_single_close', '&#8217;'),
('txt_quote_double_open', '&#8220;'),
('txt_quote_double_close', '&#8221;'),
('txt_apostrophe', '&#8217;'),
('txt_prime', '&#8242;'),
('txt_prime_double', '&#8243;'),
('txt_ellipsis', '&#8230;'),
('txt_emdash', '&#8212;'),
('txt_endash', '&#8211;'),
('txt_trademark', '&#8482;'),
('txt_registered', '&#174;'),
('txt_copyright', '&#169;'),
)
macro_defaults = [
(re.compile(r'{(c\||\|c)}'), r'&#162;'), # cent
(re.compile(r'{(L-|-L)}'), r'&#163;'), # pound
(re.compile(r'{(Y=|=Y)}'), r'&#165;'), # yen
(re.compile(r'{\(c\)}'), r'&#169;'), # copyright
(re.compile(r'{\(r\)}'), r'&#174;'), # registered
(re.compile(r'{(\+_|_\+)}'), r'&#177;'), # plus-minus
(re.compile(r'{1/4}'), r'&#188;'), # quarter
(re.compile(r'{1/2}'), r'&#189;'), # half
(re.compile(r'{3/4}'), r'&#190;'), # three-quarter
(re.compile(r'{(A`|`A)}'), r'&#192;'), # A-acute
(re.compile(r'{(A\'|\'A)}'), r'&#193;'), # A-grave
(re.compile(r'{(A\^|\^A)}'), r'&#194;'), # A-circumflex
(re.compile(r'{(A~|~A)}'), r'&#195;'), # A-tilde
(re.compile(r'{(A\"|\"A)}'), r'&#196;'), # A-diaeresis
(re.compile(r'{(Ao|oA)}'), r'&#197;'), # A-ring
(re.compile(r'{(AE)}'), r'&#198;'), # AE
(re.compile(r'{(C,|,C)}'), r'&#199;'), # C-cedilla
(re.compile(r'{(E`|`E)}'), r'&#200;'), # E-acute
(re.compile(r'{(E\'|\'E)}'), r'&#201;'), # E-grave
(re.compile(r'{(E\^|\^E)}'), r'&#202;'), # E-circumflex
(re.compile(r'{(E\"|\"E)}'), r'&#203;'), # E-diaeresis
(re.compile(r'{(I`|`I)}'), r'&#204;'), # I-acute
(re.compile(r'{(I\'|\'I)}'), r'&#205;'), # I-grave
(re.compile(r'{(I\^|\^I)}'), r'&#206;'), # I-circumflex
(re.compile(r'{(I\"|\"I)}'), r'&#207;'), # I-diaeresis
(re.compile(r'{(D-|-D)}'), r'&#208;'), # ETH
(re.compile(r'{(N~|~N)}'), r'&#209;'), # N-tilde
(re.compile(r'{(O`|`O)}'), r'&#210;'), # O-acute
(re.compile(r'{(O\'|\'O)}'), r'&#211;'), # O-grave
(re.compile(r'{(O\^|\^O)}'), r'&#212;'), # O-circumflex
(re.compile(r'{(O~|~O)}'), r'&#213;'), # O-tilde
(re.compile(r'{(O\"|\"O)}'), r'&#214;'), # O-diaeresis
(re.compile(r'{x}'), r'&#215;'), # dimension
(re.compile(r'{(O\/|\/O)}'), r'&#216;'), # O-slash
(re.compile(r'{(U`|`U)}'), r'&#217;'), # U-acute
(re.compile(r'{(U\'|\'U)}'), r'&#218;'), # U-grave
(re.compile(r'{(U\^|\^U)}'), r'&#219;'), # U-circumflex
(re.compile(r'{(U\"|\"U)}'), r'&#220;'), # U-diaeresis
(re.compile(r'{(Y\'|\'Y)}'), r'&#221;'), # Y-grave
(re.compile(r'{sz}'), r'&szlig;'), # sharp-s
(re.compile(r'{(a`|`a)}'), r'&#224;'), # a-grave
(re.compile(r'{(a\'|\'a)}'), r'&#225;'), # a-acute
(re.compile(r'{(a\^|\^a)}'), r'&#226;'), # a-circumflex
(re.compile(r'{(a~|~a)}'), r'&#227;'), # a-tilde
(re.compile(r'{(a\"|\"a)}'), r'&#228;'), # a-diaeresis
(re.compile(r'{(ao|oa)}'), r'&#229;'), # a-ring
(re.compile(r'{ae}'), r'&#230;'), # ae
(re.compile(r'{(c,|,c)}'), r'&#231;'), # c-cedilla
(re.compile(r'{(e`|`e)}'), r'&#232;'), # e-grave
(re.compile(r'{(e\'|\'e)}'), r'&#233;'), # e-acute
(re.compile(r'{(e\^|\^e)}'), r'&#234;'), # e-circumflex
(re.compile(r'{(e\"|\"e)}'), r'&#235;'), # e-diaeresis
(re.compile(r'{(i`|`i)}'), r'&#236;'), # i-grave
(re.compile(r'{(i\'|\'i)}'), r'&#237;'), # i-acute
(re.compile(r'{(i\^|\^i)}'), r'&#238;'), # i-circumflex
(re.compile(r'{(i\"|\"i)}'), r'&#239;'), # i-diaeresis
(re.compile(r'{(d-|-d)}'), r'&#240;'), # eth
(re.compile(r'{(n~|~n)}'), r'&#241;'), # n-tilde
(re.compile(r'{(o`|`o)}'), r'&#242;'), # o-grave
(re.compile(r'{(o\'|\'o)}'), r'&#243;'), # o-acute
(re.compile(r'{(o\^|\^o)}'), r'&#244;'), # o-circumflex
(re.compile(r'{(o~|~o)}'), r'&#245;'), # o-tilde
(re.compile(r'{(o\"|\"o)}'), r'&#246;'), # o-diaeresis
(re.compile(r'{(o\/|\/o)}'), r'&#248;'), # o-stroke
(re.compile(r'{(u`|`u)}'), r'&#249;'), # u-grave
(re.compile(r'{(u\'|\'u)}'), r'&#250;'), # u-acute
(re.compile(r'{(u\^|\^u)}'), r'&#251;'), # u-circumflex
(re.compile(r'{(u\"|\"u)}'), r'&#252;'), # u-diaeresis
(re.compile(r'{(y\'|\'y)}'), r'&#253;'), # y-acute
(re.compile(r'{(y\"|\"y)}'), r'&#255;'), # y-diaeresis
(re.compile(r'{OE}'), r'&#338;'), # OE
(re.compile(r'{oe}'), r'&#339;'), # oe
(re.compile(r'{(S\^|\^S)}'), r'&Scaron;'), # Scaron
(re.compile(r'{(s\^|\^s)}'), r'&scaron;'), # scaron
(re.compile(r'{\*}'), r'&#8226;'), # bullet
(re.compile(r'{Fr}'), r'&#8355;'), # Franc
(re.compile(r'{(L=|=L)}'), r'&#8356;'), # Lira
(re.compile(r'{Rs}'), r'&#8360;'), # Rupee
(re.compile(r'{(C=|=C)}'), r'&#8364;'), # euro
(re.compile(r'{tm}'), r'&#8482;'), # trademark
(re.compile(r'{spades?}'), r'&#9824;'), # spade
(re.compile(r'{clubs?}'), r'&#9827;'), # club
(re.compile(r'{hearts?}'), r'&#9829;'), # heart
(re.compile(r'{diam(onds?|s)}'), r'&#9830;'), # diamond
]
glyph_defaults = [
(re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2&#215;\3'), # dimension sign
(re.compile(r'(\d+)\'', re.I), r'\1&#8242;'), # prime
(re.compile(r'(\d+)\"', re.I), r'\1&#8243;'), # prime-double
(re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1&#8230;'), # ellipsis
(re.compile(r'^[\*_-]{3,}$', re.M), r'<hr />'), # <hr> scene-break
(re.compile(r'\b--\b'), r'&#8212;'), # em dash
(re.compile(r'(\s)--(\s)'), r'\1&#8212;\2'), # em dash
(re.compile(r'\s-(?:\s|$)'), r' &#8211; '), # en dash
(re.compile(r'\b( ?)[([]TM[])]', re.I), r'\1&#8482;'), # trademark
(re.compile(r'\b( ?)[([]R[])]', re.I), r'\1&#174;'), # registered
(re.compile(r'\b( ?)[([]C[])]', re.I), r'\1&#169;'), # copyright
]
def __init__(self, restricted=False, lite=False, noimage=False):
"""docstring for __init__"""
@ -673,211 +684,15 @@ class Textile(object):
# fix: hackish
text = re.sub(r'"\Z', '\" ', text)
glyph_search = (
re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), # dimension sign
re.compile(r"(\w)\'(\w)"), # apostrophe's
re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88
re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing
re.compile(r'\'/'), # single opening
re.compile(r'(\")\"'), # double closing - following another
re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing
re.compile(r'"'), # double opening
re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym
re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), # 3+ uppercase
re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis
re.compile(r'(\s?)--(\s?)'), # em dash
re.compile(r'\s-(?:\s|$)'), # en dash
re.compile(r'\b( ?)[([]TM[])]', re.I), # trademark
re.compile(r'\b( ?)[([]R[])]', re.I), # registered
re.compile(r'\b( ?)[([]C[])]', re.I) # copyright
)
glyph_replace = [x % dict(self.glyph_defaults) for x in (
r'\1\2%(txt_dimension)s\3', # dimension sign
r'\1%(txt_apostrophe)s\2', # apostrophe's
r'\1%(txt_apostrophe)s\2', # back in '88
r'\1%(txt_quote_single_close)s', # single closing
r'%(txt_quote_single_open)s', # single opening
r'\1%(txt_quote_double_close)s', # double closing - following another
r'\1%(txt_quote_double_close)s', # double closing
r'%(txt_quote_double_open)s', # double opening
r'<acronym title="\2">\1</acronym>', # 3+ uppercase acronym
r'<span class="caps">\1</span>', # 3+ uppercase
r'\1%(txt_ellipsis)s', # ellipsis
r'\1%(txt_emdash)s\2', # em dash
r' %(txt_endash)s ', # en dash
r'\1%(txt_trademark)s', # trademark
r'\1%(txt_registered)s', # registered
r'\1%(txt_copyright)s' # copyright
)]
if re.search(r'{.+?}', text):
glyph_search += (
re.compile(r'{(c\||\|c)}'), # cent
re.compile(r'{(L-|-L)}'), # pound
re.compile(r'{(Y=|=Y)}'), # yen
re.compile(r'{\(c\)}'), # copyright
re.compile(r'{\(r\)}'), # registered
re.compile(r'{1/4}'), # quarter
re.compile(r'{1/2}'), # half
re.compile(r'{3/4}'), # three-quarter
re.compile(r'{(A`|`A)}'), # 192;
re.compile(r'{(A\'|\'A)}'), # 193;
re.compile(r'{(A\^|\^A)}'), # 194;
re.compile(r'{(A~|~A)}'), # 195;
re.compile(r'{(A\"|\"A)}'), # 196;
re.compile(r'{(Ao|oA)}'), # 197;
re.compile(r'{(AE)}'), # 198;
re.compile(r'{(C,|,C)}'), # 199;
re.compile(r'{(E`|`E)}'), # 200;
re.compile(r'{(E\'|\'E)}'), # 201;
re.compile(r'{(E\^|\^E)}'), # 202;
re.compile(r'{(E\"|\"E)}'), # 203;
re.compile(r'{(I`|`I)}'), # 204;
re.compile(r'{(I\'|\'I)}'), # 205;
re.compile(r'{(I\^|\^I)}'), # 206;
re.compile(r'{(I\"|\"I)}'), # 207;
re.compile(r'{(D-|-D)}'), # 208;
re.compile(r'{(N~|~N)}'), # 209;
re.compile(r'{(O`|`O)}'), # 210;
re.compile(r'{(O\'|\'O)}'), # 211;
re.compile(r'{(O\^|\^O)}'), # 212;
re.compile(r'{(O~|~O)}'), # 213;
re.compile(r'{(O\"|\"O)}'), # 214;
re.compile(r'{(O\/|\/O)}'), # 215;
re.compile(r'{(U`|`U)}'), # 216;
re.compile(r'{(U\'|\'U)}'), # 217;
re.compile(r'{(U\^|\^U)}'), # 218;
re.compile(r'{(U\"|\"U)}'), # 219;
re.compile(r'{(Y\'|\'Y)}'), # 220;
re.compile(r'{(a`|`a)}'), # a-grace
re.compile(r'{(a\'|\'a)}'), # a-acute
re.compile(r'{(a\^|\^a)}'), # a-circumflex
re.compile(r'{(a~|~a)}'), # a-tilde
re.compile(r'{(a\"|\"a)}'), # a-diaeresis
re.compile(r'{(ao|oa)}'), # a-ring
re.compile(r'{ae}'), # ae
re.compile(r'{(c,|,c)}'), # c-cedilla
re.compile(r'{(e`|`e)}'), # e-grace
re.compile(r'{(e\'|\'e)}'), # e-acute
re.compile(r'{(e\^|\^e)}'), # e-circumflex
re.compile(r'{(e\"|\"e)}'), # e-diaeresis
re.compile(r'{(i`|`i)}'), # i-grace
re.compile(r'{(i\'|\'i)}'), # i-acute
re.compile(r'{(i\^|\^i)}'), # i-circumflex
re.compile(r'{(i\"|\"i)}'), # i-diaeresis
re.compile(r'{(n~|~n)}'), # n-tilde
re.compile(r'{(o`|`o)}'), # o-grace
re.compile(r'{(o\'|\'o)}'), # o-acute
re.compile(r'{(o\^|\^o)}'), # o-circumflex
re.compile(r'{(o~|~o)}'), # o-tilde
re.compile(r'{(o\"|\"o)}'), # o-diaeresis
re.compile(r'{(o\/|\/o)}'), # o-stroke
re.compile(r'{(u`|`u)}'), # u-grace
re.compile(r'{(u\'|\'u)}'), # u-acute
re.compile(r'{(u\^|\^u)}'), # u-circumflex
re.compile(r'{(u\"|\"u)}'), # u-diaeresis
re.compile(r'{(y\'|\'y)}'), # y-acute
re.compile(r'{(y\"|\"y)}'), # y-diaeresis
re.compile(r'{OE}'), # y-diaeresis
re.compile(r'{oe}'), # y-diaeresis
re.compile(r'{\*}'), # bullet
re.compile(r'{Fr}'), # Franc
re.compile(r'{(L=|=L)}'), # Lira
re.compile(r'{Rs}'), # Rupee
re.compile(r'{(C=|=C)}'), # euro
re.compile(r'{tm}'), # euro
re.compile(r'{spade}'), # spade
re.compile(r'{club}'), # club
re.compile(r'{heart}'), # heart
re.compile(r'{diamond}') # diamond
)
glyph_replace += [x % dict(self.glyph_defaults) for x in (
r'%(mac_cent)s', # cent
r'%(mac_pound)s', # pound
r'%(mac_yen)s', # yen
r'%(txt_copyright)s', # copyright
r'%(txt_registered)s', # registered
r'%(mac_quarter)s', # quarter
r'%(mac_half)s', # half
r'%(mac_three-quarter)s', # three-quarter
r'%(mac_cA-grave)s', # 192;
r'%(mac_cA-acute)s', # 193;
r'%(mac_cA-circumflex)s', # 194;
r'%(mac_cA-tilde)s', # 195;
r'%(mac_cA-diaeresis)s', # 196;
r'%(mac_cA-ring)s', # 197;
r'%(mac_cAE)s', # 198;
r'%(mac_cC-cedilla)s', # 199;
r'%(mac_cE-grave)s', # 200;
r'%(mac_cE-acute)s', # 201;
r'%(mac_cE-circumflex)s', # 202;
r'%(mac_cE-diaeresis)s', # 203;
r'%(mac_cI-grave)s', # 204;
r'%(mac_cI-acute)s', # 205;
r'%(mac_cI-circumflex)s', # 206;
r'%(mac_cI-diaeresis)s', # 207;
r'%(mac_cEth)s', # 208;
r'%(mac_cN-tilde)s', # 209;
r'%(mac_cO-grave)s', # 210;
r'%(mac_cO-acute)s', # 211;
r'%(mac_cO-circumflex)s', # 212;
r'%(mac_cO-tilde)s', # 213;
r'%(mac_cO-diaeresis)s', # 214;
r'%(mac_cO-stroke)s', # 216;
r'%(mac_cU-grave)s', # 217;
r'%(mac_cU-acute)s', # 218;
r'%(mac_cU-circumflex)s', # 219;
r'%(mac_cU-diaeresis)s', # 220;
r'%(mac_cY-acute)s', # 221;
r'%(mac_sa-grave)s', # 224;
r'%(mac_sa-acute)s', # 225;
r'%(mac_sa-circumflex)s', # 226;
r'%(mac_sa-tilde)s', # 227;
r'%(mac_sa-diaeresis)s', # 228;
r'%(mac_sa-ring)s', # 229;
r'%(mac_sae)s', # 230;
r'%(mac_sc-cedilla)s', # 231;
r'%(mac_se-grave)s', # 232;
r'%(mac_se-acute)s', # 233;
r'%(mac_se-circumflex)s', # 234;
r'%(mac_se-diaeresis)s', # 235;
r'%(mac_si-grave)s', # 236;
r'%(mac_si-acute)s', # 237;
r'%(mac_si-circumflex)s', # 238;
r'%(mac_si-diaeresis)s', # 239;
r'%(mac_sn-tilde)s', # 241;
r'%(mac_so-grave)s', # 242;
r'%(mac_so-acute)s', # 243;
r'%(mac_so-circumflex)s', # 244;
r'%(mac_so-tilde)s', # 245;
r'%(mac_so-diaeresis)s', # 246;
r'%(mac_so-stroke)s', # 248;
r'%(mac_su-grave)s', # 249;
r'%(mac_su-acute)s', # 250;
r'%(mac_su-circumflex)s', # 251;
r'%(mac_su-diaeresis)s', # 252;
r'%(mac_sy-acute)s', # 253;
r'%(mac_sy-diaeresis)s', # 255;
r'%(mac_cOE)s', # 338;
r'%(mac_soe)s', # 339;
r'%(mac_bullet)s', # bullet
r'%(mac_franc)s', # franc
r'%(mac_lira)s', # lira
r'%(mac_rupee)s', # rupee
r'%(mac_euro)s', # euro
r'%(txt_trademark)s', # trademark
r'%(mac_spade)s', # spade
r'%(mac_club)s', # club
r'%(mac_heart)s', # heart
r'%(mac_diamond)s' # diamond
)]
result = []
for line in re.compile(r'(<.*?>)', re.U).split(text):
if not re.search(r'<.*>', line):
for s, r in zip(glyph_search, glyph_replace):
rules = []
if re.search(r'{.+?}', line):
rules = self.macro_defaults + self.glyph_defaults
else:
rules = self.glyph_defaults
for s, r in rules:
line = s.sub(r, line)
result.append(line)
return ''.join(result)
@ -927,7 +742,7 @@ class Textile(object):
return url
def shelve(self, text):
id = str(uuid.uuid4())
id = str(uuid.uuid4()) + 'c'
self.shelf[id] = text
return id
@ -1049,7 +864,7 @@ class Textile(object):
for qtag in qtags:
pattern = re.compile(r"""
(?:^|(?<=[\s>%(pnct)s])|\[|([\]}]))
(?:^|(?<=[\s>%(pnct)s\(])|\[|([\]}]))
(%(qtag)s)(?!%(qtag)s)
(%(c)s)
(?::(\S+))?

View File

@ -165,6 +165,7 @@ class TXTInput(InputFormatPlugin):
elif options.formatting_type == 'textile':
log.debug('Running text through textile conversion...')
html = convert_textile(txt)
setattr(options, 'smarten_punctuation', True)
else:
log.debug('Running text through basic conversion...')
flow_size = getattr(options, 'flow_size', 0)

View File

@ -25,8 +25,11 @@ class PreferencesAction(InterfaceAction):
self.gui.run_wizard)
if not DEBUG:
pm.addSeparator()
pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
ac = pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
self.debug_restart)
ac.setShortcut('Ctrl+Shift+R')
self.gui.addAction(ac)
self.qaction.setMenu(pm)
self.preferences_menu = pm
for x in (self.gui.preferences_action, self.qaction):

View File

@ -21,7 +21,7 @@ class StructureDetectionWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
['chapter', 'chapter_mark',
'remove_first_image',
'remove_first_image', 'remove_fake_margins',
'insert_metadata', 'page_breaks_before']
)
self.db, self.book_id = db, book_id

View File

@ -48,10 +48,10 @@
</property>
</widget>
</item>
<item row="6" column="0" colspan="3">
<item row="7" column="0" colspan="3">
<widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
</item>
<item row="7" column="0" colspan="3">
<item row="8" column="0" colspan="3">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -77,7 +77,7 @@
</property>
</spacer>
</item>
<item row="4" column="0" colspan="3">
<item row="5" column="0" colspan="3">
<widget class="QLabel" name="label_2">
<property name="text">
<string>The header and footer removal options have been replaced by the Search &amp; Replace options. Click the Search &amp; Replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.</string>
@ -87,6 +87,13 @@
</property>
</widget>
</item>
<item row="2" column="2">
<widget class="QCheckBox" name="opt_remove_fake_margins">
<property name="text">
<string>Remove &amp;fake margins</string>
</property>
</widget>
</item>
</layout>
</widget>
<customwidgets>

View File

@ -16,8 +16,7 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, QFont, QSize, \
QIcon, QPoint, QVBoxLayout, QHBoxLayout, QComboBox, QTimer,\
QAbstractItemModel, QVariant, QModelIndex, QMenu, QFrame,\
QPushButton, QWidget, QItemDelegate, QString, QLabel, \
QShortcut, QKeySequence, SIGNAL, QMimeData, QSizePolicy,\
QToolButton
QShortcut, QKeySequence, SIGNAL, QMimeData, QToolButton
from calibre.ebooks.metadata import title_sort
from calibre.gui2 import config, NONE, gprefs
@ -1051,12 +1050,12 @@ class TagsModel(QAbstractItemModel): # {{{
if (key == 'authors' and len(ids) >= 5):
if not confirm('<p>'+_('Changing the authors for several books can '
'take a while. Are you sure?')
+'</p>', 'tag_browser_drop_authors', self.parent()):
+'</p>', 'tag_browser_drop_authors', self.tags_view):
return
elif len(ids) > 15:
if not confirm('<p>'+_('Changing the metadata for that many books '
'can take a while. Are you sure?')
+'</p>', 'tag_browser_many_changes', self.parent()):
+'</p>', 'tag_browser_many_changes', self.tags_view):
return
fm = self.db.metadata_for_field(key)

View File

@ -12,18 +12,17 @@ __docformat__ = 'restructuredtext en'
import collections, os, sys, textwrap, time, gc
from Queue import Queue, Empty
from threading import Thread
from PyQt4.Qt import Qt, SIGNAL, QTimer, QHelpEvent, QAction, \
QMenu, QIcon, pyqtSignal, \
QDialog, QSystemTrayIcon, QApplication, QKeySequence
from PyQt4.Qt import (Qt, SIGNAL, QTimer, QHelpEvent, QAction,
QMenu, QIcon, pyqtSignal, QUrl,
QDialog, QSystemTrayIcon, QApplication, QKeySequence)
from calibre import prints
from calibre.constants import __appname__, isosx
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import prefs, dynamic
from calibre.utils.ipc.server import Server
from calibre.library.database2 import LibraryDatabase2
from calibre.customize.ui import interface_actions
from calibre.gui2 import error_dialog, GetMetadata, open_local_file, \
from calibre.gui2 import error_dialog, GetMetadata, open_url, \
gprefs, max_available_height, config, info_dialog, Dispatcher, \
question_dialog
from calibre.gui2.cover_flow import CoverFlowMixin
@ -567,37 +566,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
QApplication.instance().quit()
def donate(self, *args):
BUTTON = '''
<form action="https://www.paypal.com/cgi-bin/webscr" method="post">
<input type="hidden" name="cmd" value="_s-xclick" />
<input type="hidden" name="hosted_button_id" value="3029467" />
<input type="image" src="https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif" border="0" name="submit" alt="Donate to support calibre development" />
<img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
</form>
'''
MSG = _('is the result of the efforts of many volunteers from all '
'over the world. If you find it useful, please consider '
'donating to support its development. Your donation helps '
'keep calibre development going.')
HTML = u'''
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<title>Donate to support calibre</title>
</head>
<body style="background:white">
<div><a href="http://calibre-ebook.com"><img style="border:0px"
src="file://%s" alt="calibre" /></a></div>
<p>Calibre %s</p>
%s
</body>
</html>
'''%(P('content_server/calibre_banner.png').replace(os.sep, '/'), MSG, BUTTON)
pt = PersistentTemporaryFile('_donate.htm')
pt.write(HTML.encode('utf-8'))
pt.close()
open_local_file(pt.name)
open_url(QUrl('http://calibre-ebook.com/donate'))
def confirm_quit(self):
if self.job_manager.has_jobs():

View File

@ -317,7 +317,7 @@ class CoverView(QGraphicsView, ImageDropMixin):
ImageDropMixin.__init__(self)
def get_pixmap(self):
for item in self.scene().items():
for item in self.scene.items():
if hasattr(item, 'pixmap'):
return item.pixmap()
@ -342,6 +342,7 @@ class FontFamilyModel(QAbstractListModel):
self.families = list(qt_families.intersection(set(self.families)))
self.families.sort()
self.families[:0] = [_('None')]
self.font = QFont('sansserif')
def rowCount(self, *args):
return len(self.families)
@ -354,10 +355,11 @@ class FontFamilyModel(QAbstractListModel):
return NONE
if role == Qt.DisplayRole:
return QVariant(family)
if False and role == Qt.FontRole:
# Causes a Qt crash with some fonts
# so disabled.
return QVariant(QFont(family))
if role == Qt.FontRole:
# If a user chooses some non standard font as the interface font,
# rendering some font names causes Qt to crash, so return what is
# hopefully a "safe" font
return QVariant(self.font)
return NONE
def index_of(self, family):

View File

@ -549,6 +549,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
- Download metadata and shortcuts
* - :kbd:`Ctrl+R`
- Restart calibre
* - :kbd:`Ctrl+Shift+R`
- Restart calibre in debug mode
* - :kbd:`Shift+Ctrl+E`
- Add empty books to calibre
* - :kbd:`Ctrl+Q`

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
@ -8,114 +8,71 @@ __docformat__ = 'restructuredtext en'
Plugin to make the commit command automatically close bugs when the commit
message contains `Fix #number` or `Implement #number`. Also updates the commit
message with the summary of the closed bug. It also set the `--fixes` metadata
appropriately. Currently only works with a Trac bug repository with the XMLRPC
plugin enabled.
To use copy this file into `~/.bazaar/plugins` and add the following to branch.conf
in the working tree you want to use it with::
trac_reponame_url = <url>
trac_reponame_username = <username>
trac_reponame_password = <password>
appropriately.
'''
import os, re, xmlrpclib, subprocess
from bzrlib.builtins import cmd_commit as _cmd_commit, tree_files
from bzrlib import branch
import re, urllib, importlib, sys
from bzrlib.builtins import cmd_commit as _cmd_commit
import bzrlib
from lxml import html
SENDMAIL = ('/home/kovid/work/kde', 'pgp_mail')
class cmd_commit(_cmd_commit):
@classmethod
def trac_url(self, username, password, url):
return url.replace('//', '//%s:%s@'%(username, password))+'/login/xmlrpc'
def get_trac_summary(self, bug, url):
print 'Getting bug summary for bug #%s'%bug,
server = xmlrpclib.ServerProxy(url)
attributes = server.ticket.get(int(bug))[-1]
print attributes['summary']
return attributes['summary']
def expand_bug(self, msg, nick, config, bug_tracker, type='trac'):
prefix = '%s_%s_'%(type, nick)
username = config.get_user_option(prefix+'username')
password = config.get_user_option(prefix+'password')
close_bug = config.get_user_option(prefix+'pattern')
if close_bug is None:
def expand_bug(self, msg):
close_bug = r'(Fix|Implement|Fixes|Fixed|Implemented)\s+#(\d+)'
close_bug_pat = re.compile(close_bug, re.IGNORECASE)
match = close_bug_pat.search(msg)
if not match:
return msg, None, None, None
return msg, None, None
action, bug = match.group(1), match.group(2)
summary = ''
if type == 'trac':
url = self.trac_url(username, password, bug_tracker)
summary = self.get_trac_summary(bug, url)
raw = urllib.urlopen('https://bugs.launchpad.net/calibre/+bug/' +
bug).read()
h1 = html.fromstring(raw).xpath('//h1[@id="edit-title"]')[0]
summary = html.tostring(h1, method='text', encoding=unicode).strip()
print 'Working on bug:', summary
if summary:
msg = msg.replace('#%s'%bug, '#%s (%s)'%(bug, summary))
msg = msg.replace('Fixesed', 'Fixed')
return msg, bug, url, action
def get_bugtracker(self, basedir, type='trac'):
config = os.path.join(basedir, '.bzr', 'branch', 'branch.conf')
bugtracker, nick = None, None
if os.access(config, os.R_OK):
for line in open(config).readlines():
match = re.search(r'%s_(\S+)_url\s*=\s*(\S+)'%type, line)
if match:
nick, bugtracker = match.group(1), match.group(2)
break
return nick, bugtracker
def expand_message(self, msg, tree):
nick, bugtracker = self.get_bugtracker(tree.basedir, type='trac')
if not bugtracker:
return msg
config = branch.Branch.open(tree.basedir).get_config()
msg, bug, url, action = self.expand_bug(msg, nick, config, bugtracker)
return msg, bug, url, action, nick, config
return msg, bug, action
def run(self, message=None, file=None, verbose=False, selected_list=None,
unchanged=False, strict=False, local=False, fixes=None,
author=None, show_diff=False, exclude=None):
nick = config = bug = action = None
bug = action = None
if message:
try:
message, bug, url, action, nick, config = \
self.expand_message(message, tree_files(selected_list)[0])
except ValueError:
pass
message, bug, action = self.expand_bug(message)
if nick and bug and not fixes:
fixes = [nick+':'+bug]
if bug and not fixes:
fixes = ['lp:'+bug]
ret = _cmd_commit.run(self, message=message, file=file, verbose=verbose,
selected_list=selected_list, unchanged=unchanged,
strict=strict, local=local, fixes=fixes,
author=author, show_diff=show_diff, exclude=exclude)
if message and bug and action and nick and config:
self.close_bug(bug, action, url, config)
if message and bug and action:
self.close_bug(bug, action)
return ret
def close_bug(self, bug, action, url, config):
def close_bug(self, bug, action):
print 'Closing bug #%s'% bug
#nick = config.get_nickname()
suffix = config.get_user_option('bug_close_comment')
if suffix is None:
suffix = 'The fix will be in the next release.'
suffix = ('The fix will be in the next release. '
'calibre is usually released every Friday.')
action = action+'ed'
msg = '%s in branch %s. %s'%(action, 'lp:calibre', suffix)
msg = msg.replace('Fixesed', 'Fixed')
server = xmlrpclib.ServerProxy(url)
server.ticket.update(int(bug), msg,
{'status':'closed', 'resolution':'fixed'},
True)
subprocess.Popen('/home/kovid/work/kde/mail.py -f --delay 10'.split())
msg += '\n\n status fixreleased'
sys.path.insert(0, SENDMAIL[0])
sendmail = importlib.import_module(SENDMAIL[1])
to = bug+'@bugs.launchpad.net'
sendmail.sendmail(msg, to, 'Fixed in lp:calibre')
bzrlib.commands.register_command(cmd_commit)

View File

@ -8,15 +8,18 @@ import re, htmlentitydefs
_ascii_pat = None
def clean_ascii_chars(txt, charlist=None):
'''
Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
This is all control chars except \\t,\\n and \\r
r'''
Remove ASCII control chars.
This is all control chars except \t, \n and \r
'''
if not txt:
return ''
global _ascii_pat
if _ascii_pat is None:
chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
chars = set(xrange(32))
chars.add(127)
for x in (9, 10, 13):
chars.remove(x)
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
if charlist is None:

View File

@ -584,6 +584,12 @@ def educateQuotes(str):
# <p>He said, "'Quoted' words in a larger quote."</p>
str = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", str)
str = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", str)
str = re.sub(r'''""(?=\w)''', """&#8220;&#8220;""", str)
str = re.sub(r"""''(?=\w)""", """&#8216;&#8216;""", str)
str = re.sub(r'''\"\'''', """&#8221;&#8217;""", str)
str = re.sub(r'''\'\"''', """&#8217;&#8221;""", str)
str = re.sub(r'''""''', """&#8221;&#8221;""", str)
str = re.sub(r"""''""", """&#8217;&#8217;""", str)
# Special case for decade abbreviations (the '80s):
str = re.sub(r"""\b'(?=\d{2}s)""", r"""&#8217;""", str)

View File

@ -251,12 +251,12 @@ class WMF(object):
img.load(bmp)
return img.export('png')
def wmf_unwrap(wmf_data):
def wmf_unwrap(wmf_data, verbose=0):
'''
Return the largest embedded raster image in the WMF.
The returned data is in PNG format.
'''
w = WMF()
w = WMF(verbose=verbose)
w(wmf_data)
if not w.has_raster_image:
raise ValueError('No raster image found in the WMF')
@ -266,4 +266,5 @@ if __name__ == '__main__':
wmf = WMF(verbose=4)
wmf(open(sys.argv[-1], 'rb'))
open('/t/test.bmp', 'wb').write(wmf.bitmaps[0])
open('/t/test.png', 'wb').write(wmf.to_png())

View File

@ -28,6 +28,7 @@ class Article(object):
pass
if not isinstance(self._title, unicode):
self._title = self._title.decode('utf-8', 'replace')
self._title = clean_ascii_chars(self._title)
self.url = url
self.author = author
if author and not isinstance(author, unicode):
@ -75,7 +76,7 @@ class Article(object):
t = t.decode('utf-8', 'replace')
return t
def fset(self, val):
self._title = val
self._title = clean_ascii_chars(val)
return property(fget=fget, fset=fset)