Merge from trunk

This commit is contained in:
Charles Haley 2011-03-22 12:57:34 +00:00
commit c5b2de9663
33 changed files with 796 additions and 516 deletions

79
recipes/caijing.recipe Normal file
View File

@ -0,0 +1,79 @@
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class Caijing(BasicNewsRecipe):
title = 'Caijing Magazine'
__author__ = 'Eric Chen'
description = '''Bi-weekly Finance and Economics Review. Founded in 1998, the fortnightly CAIJING
Magazine has firmly established itself as a news authority and leading voice for
business and financial issues in China.
CAIJING Magazine closely tracks the most important aspects of China's economic reforms,
developments and policy changes, as well as major events in the capital markets. It also
offers a broad international perspective through first-hand reporting on international
political and economic issues.
CAIJING Magazine is China's most widely read business and finance magazine, with a
circulation of 225,000 per issue. It boasts top-level readers from government, business
and academic circles. '''
language = 'zh'
category = 'news, China'
encoding = 'UTF-8'
timefmt = ' [%a, %d %b, %Y]'
needs_subscription = True
remove_tags = [dict(attrs={'class':['topad', 'nav', 'searchbox', 'connav',
'mbx', 'bianji', 'bianji bj', 'lnewlist', 'rdtj', 'loadComment',
'conr', 'bottom', 'bottomcopyr', 'emaildy', 'rcom', 'allcontent']}),
dict(name=['script', 'noscript', 'style'])]
no_stylesheets = True
remove_javascript = True
current_issue_url = ""
current_issue_cover = ""
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://service.caijing.com.cn/usermanage/login')
br.select_form(name='mainLoginForm')
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
articles = []
soup0 = self.index_to_soup('http://magazine.caijing.com.cn/2011/cjindex2011/')
div = soup0.find('div', attrs={'class':'fmcon'})
link = div.find('a', href=True)
current_issue_url = link['href']
soup = self.index_to_soup(current_issue_url)
for div_cover in soup.findAll('img', {'src' : re.compile('.')}):
if re.search('\d{4}-\d{2}-\d{2}', div_cover['src']):
self.current_issue_cover = div_cover['src']
feeds = []
for section in soup.findAll('div', attrs={'class':'cebd'}):
section_title = self.tag_to_string(section.find('div', attrs={'class':'ceti'}))
articles = []
for post in section.findAll('a', href=True):
if re.search('\d{4}-\d{2}-\d{2}', post['href']):
date = re.search('\d{4}-\d{2}-\d{2}', post['href']).group(0)
id = re.search('\d{9}', post['href']).group(0)
url = re.sub(r'\d.*', 'templates/inc/chargecontent2.jsp?id=', post['href'])
url = url + id + '&time=' + date + '&cl=106&page=all'
title = self.tag_to_string(post)
articles.append({'title':title, 'url':url, 'date':date})
if articles:
feeds.append((section_title, articles))
return feeds
def get_cover_url(self):
return self.current_issue_cover

View File

@ -8,13 +8,13 @@ __description__ = 'Providing context and clarity on national and international n
'''csmonitor.com''' '''csmonitor.com'''
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ChristianScienceMonitor(BasicNewsRecipe): class ChristianScienceMonitor(BasicNewsRecipe):
author = 'Kovid Goyal, Sujata Raman and Lorenzo Vigentini' __author__ = 'Kovid Goyal'
description = 'Providing context and clarity on national and international news, peoples and cultures' description = 'Providing context and clarity on national and international news, peoples and cultures'
cover_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif' cover_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
@ -34,6 +34,49 @@ class ChristianScienceMonitor(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
def append_page(self, soup, appendtag, position):
nav = soup.find('div',attrs={'class':'navigation'})
if nav:
pager = nav.findAll('a')
for part in pager:
if 'Next' in part:
nexturl = ('http://www.csmonitor.com' +
re.findall(r'href="(.*?)"', str(part))[0])
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div',
attrs={'class': re.compile('list-article-.*')})
trash_c = soup2.findAll(attrs={'class': 'list-description'})
trash_h = soup2.h1
for tc in trash_c: tc.extract()
trash_h.extract()
newpos = len(texttag.contents)
self.append_page(soup2, texttag, newpos)
texttag.extract()
appendtag.insert(position, texttag)
def preprocess_html(self, soup):
PRINT_RE = re.compile(r'/layout/set/print/content/view/print/[0-9]*')
html = str(soup)
try:
print_found = PRINT_RE.findall(html)
except Exception:
pass
if print_found:
print_url = 'http://www.csmonitor.com' + print_found[0]
print_soup = self.index_to_soup(print_url)
else:
self.append_page(soup, soup.body, 3)
trash_a = soup.findAll(attrs={'class': re.compile('navigation.*')})
trash_b = soup.findAll(attrs={'style': re.compile('.*')})
trash_d = soup.findAll(attrs={'class': 'sByline'})
for ta in trash_a: ta.extract()
for tb in trash_b: tb.extract()
for td in trash_d: td.extract()
print_soup = soup
return print_soup
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[ [
@ -43,7 +86,6 @@ class ChristianScienceMonitor(BasicNewsRecipe):
(r'Full HTML version of this story which may include photos, graphics, and related links.*</body>', (r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
lambda match : '</body>'), lambda match : '</body>'),
]] ]]
extra_css = ''' extra_css = '''
h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large} h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
.sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;} .sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
@ -56,10 +98,9 @@ class ChristianScienceMonitor(BasicNewsRecipe):
#main{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; } #main{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
#photo-details{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;} #photo-details{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
span.name{color:#205B87;font-family: Georgia,Times,"Times New Roman",serif; font-size: x-small} span.name{color:#205B87;font-family: Georgia,Times,"Times New Roman",serif; font-size: x-small}
p#dateline{color:#444444 ; font-family:Arial,Helvetica,sans-serif ; font-style:italic;} p#dateline{color:#444444 ; font-family:Arial,Helvetica,sans-serif ; font-style:italic;} '''
'''
feeds = [ feeds = [(u'Top Stories', u'http://rss.csmonitor.com/feeds/top'),
(u'Top Stories' , u'http://rss.csmonitor.com/feeds/top'),
(u'World' , u'http://rss.csmonitor.com/feeds/world'), (u'World' , u'http://rss.csmonitor.com/feeds/world'),
(u'USA' , u'http://rss.csmonitor.com/feeds/usa'), (u'USA' , u'http://rss.csmonitor.com/feeds/usa'),
(u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary'), (u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary'),
@ -74,9 +115,7 @@ class ChristianScienceMonitor(BasicNewsRecipe):
(u'Home Forum' , u'http://rss.csmonitor.com/feeds/homeforum') (u'Home Forum' , u'http://rss.csmonitor.com/feeds/homeforum')
] ]
keep_only_tags = [ keep_only_tags = [dict(name='div', attrs={'id':'mainColumn'}), ]
dict(name='div', attrs={'id':'mainColumn'}),
]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}), dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
@ -86,7 +125,10 @@ class ChristianScienceMonitor(BasicNewsRecipe):
'hide', 'podBrdr']}), 'hide', 'podBrdr']}),
dict(name='ul', attrs={'class':[ 'centerliststories']}) , dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
dict(name='form', attrs={'id':[ 'commentform']}) , dict(name='form', attrs={'id':[ 'commentform']}) ,
dict(name='div', attrs={'class': ['ui-comments']})
] ]
remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']})] remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']}),
dict(name='div', attrs={'class': [re.compile('navigation.*')]}),
dict(name='div', attrs={'style': [re.compile('.*')]})
]

View File

@ -1,4 +1,3 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -6,55 +5,48 @@ class WashingtonPost(BasicNewsRecipe):
title = 'Washington Post' title = 'Washington Post'
description = 'US political news' description = 'US political news'
__author__ = 'Kovid Goyal and Sujata Raman' __author__ = 'Kovid Goyal'
use_embedded_content = False use_embedded_content = False
max_articles_per_feed = 20 max_articles_per_feed = 20
language = 'en' language = 'en'
encoding = 'utf-8'
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
extra_css = ''' feeds = [
#articleCopyright { font-family:Arial,helvetica,sans-serif ; font-weight:bold ; font-size:x-small ;} ('Politics', 'http://www.washingtonpost.com/rss/politics'),
p { font-family:"Times New Roman",times,serif ; font-weight:normal ; font-size:small ;} ('Nation', 'http://www.washingtonpost.com/rss/national'),
body{font-family:arial,helvetica,sans-serif} ('World', 'http://www.washingtonpost.com/rss/world'),
''' ('Business', 'http://www.washingtonpost.com/rss/business'),
('Lifestyle', 'http://www.washingtonpost.com/rss/lifestyle'),
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'), ('Sports', 'http://www.washingtonpost.com/rss/sports'),
('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'), ('Redskins', 'http://www.washingtonpost.com/rss/sports/redskins'),
('Nation', 'http://www.washingtonpost.com/wp-dyn/rss/nation/index.xml'), ('Opinions', 'http://www.washingtonpost.com/rss/opinions'),
('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'), ('Entertainment', 'http://www.washingtonpost.com/rss/entertainment'),
('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'), ('Local', 'http://www.washingtonpost.com/rss/local'),
('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'), ('Investigations',
('Health', 'http://www.washingtonpost.com/wp-dyn/rss/health/index.xml'), 'http://www.washingtonpost.com/rss/investigations'),
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
('Style',
'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
('NFL Sports',
'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
] ]
remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}] remove_tags = [
{'class':lambda x: x and 'article-toolbar' in x},
{'class':lambda x: x and 'quick-comments' in x},
{'class':lambda x: x and 'tweet' in x},
{'class':lambda x: x and 'article-related' in x},
{'class':lambda x: x and 'hidden' in x.split()},
{'class':lambda x: x and 'also-read' in x.split()},
{'class':lambda x: x and 'partners-content' in x.split()},
{'class':['module share', 'module ads', 'comment-vars', 'hidden',
'share-icons-wrap', 'comments']},
{'id':['right-rail']},
]
keep_only_tags = dict(id=['content', 'article'])
def get_article_url(self, article):
return article.get('guid', article.get('link', None))
def print_version(self, url): def print_version(self, url):
return url.rpartition('.')[0] + '_pf.html' url = url.rpartition('?')[0]
return url.replace('_story.html', '_singlePage.html')
def postprocess_html(self, soup, first):
for div in soup.findAll(name='div', style=re.compile('margin')):
div['style'] = ''
return soup
def preprocess_html(self, soup):
for tag in soup.findAll('font'):
if tag.has_key('size'):
if tag['size'] == '+2':
if tag.b:
return soup
return None

View File

@ -18,6 +18,6 @@ def recipe_title_callback(raw):
return eval(raw.decode('utf-8')) return eval(raw.decode('utf-8'))
vipy.session.add_content_browser('.r', ',r', 'Recipe', vipy.session.add_content_browser('.r', ',r', 'Recipe',
vipy.session.glob_based_iterator(os.path.join(project_dir, 'resources', 'recipes', '*.recipe')), vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback)) vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
EOFPY EOFPY

View File

@ -14,9 +14,9 @@ from setup.build_environment import HOST, PROJECT
BASE_RSYNC = ['rsync', '-avz', '--delete'] BASE_RSYNC = ['rsync', '-avz', '--delete']
EXCLUDES = [] EXCLUDES = []
for x in [ for x in [
'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac', 'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac', 'recipes',
'.bzr', '.build', '.svn', 'build', 'dist', 'imgsrc', '*.pyc', '*.pyo', '*.swp', '.bzr', '.build', '.svn', 'build', 'dist', 'imgsrc', '*.pyc', '*.pyo', '*.swp',
'*.swo']: '*.swo', 'format_docs']:
EXCLUDES.extend(['--exclude', x]) EXCLUDES.extend(['--exclude', x])
SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES] SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]
@ -138,7 +138,7 @@ class VMInstaller(Command):
self.vm = self.VM self.vm = self.VM
if not self.vmware_started(): if not self.vmware_started():
self.start_vmware() self.start_vmware()
subprocess.call(['chmod', '-R', '+r', 'resources/recipes']) subprocess.call(['chmod', '-R', '+r', 'recipes'])
self.start_vm() self.start_vm()
self.download_installer() self.download_installer()
if not self.dont_shutdown: if not self.dont_shutdown:

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import sys, os, shutil, glob, py_compile, subprocess, re import sys, os, shutil, glob, py_compile, subprocess, re, zipfile, time
from setup import Command, modules, functions, basenames, __version__, \ from setup import Command, modules, functions, basenames, __version__, \
__appname__ __appname__
@ -40,6 +40,13 @@ DESCRIPTIONS = {
'calibre-smtp' : 'Command line interface for sending books via email', 'calibre-smtp' : 'Command line interface for sending books via email',
} }
def walk(dir):
''' A nice interface to os.walk '''
for record in os.walk(dir):
for f in record[-1]:
yield os.path.join(record[0], f)
class Win32Freeze(Command, WixMixIn): class Win32Freeze(Command, WixMixIn):
description = 'Free windows calibre installation' description = 'Free windows calibre installation'
@ -63,12 +70,15 @@ class Win32Freeze(Command, WixMixIn):
self.rc_template = self.j(self.d(self.a(__file__)), 'template.rc') self.rc_template = self.j(self.d(self.a(__file__)), 'template.rc')
self.py_ver = ''.join(map(str, sys.version_info[:2])) self.py_ver = ''.join(map(str, sys.version_info[:2]))
self.lib_dir = self.j(self.base, 'Lib') self.lib_dir = self.j(self.base, 'Lib')
self.pydlib = self.j(self.base, 'pydlib')
self.pylib = self.j(self.base, 'pylib.zip')
self.initbase() self.initbase()
self.build_launchers() self.build_launchers()
self.freeze() self.freeze()
self.embed_manifests() self.embed_manifests()
self.install_site_py() self.install_site_py()
self.archive_lib_dir()
self.create_installer() self.create_installer()
def initbase(self): def initbase(self):
@ -356,4 +366,108 @@ class Win32Freeze(Command, WixMixIn):
dest, lib] dest, lib]
self.run_builder(cmd) self.run_builder(cmd)
def archive_lib_dir(self):
self.info('Putting all python code into a zip file for performance')
if os.path.exists(self.pydlib):
shutil.rmtree(self.pydlib)
os.makedirs(self.pydlib)
self.zf_timestamp = time.localtime(time.time())[:6]
self.zf_names = set()
with zipfile.ZipFile(self.pylib, 'w', zipfile.ZIP_STORED) as zf:
for x in os.listdir(self.lib_dir):
if x == 'site-packages':
continue
self.add_to_zipfile(zf, x, self.lib_dir)
sp = self.j(self.lib_dir, 'site-packages')
handled = set(['site.pyo'])
for pth in ('PIL.pth', 'pywin32.pth'):
handled.add(pth)
shutil.copyfile(self.j(sp, pth), self.j(self.pydlib, pth))
for d in self.get_pth_dirs(self.j(sp, pth)):
shutil.copytree(d, self.j(self.pydlib, self.b(d)), True)
handled.add(self.b(d))
handled.add('easy-install.pth')
for d in self.get_pth_dirs(self.j(sp, 'easy-install.pth')):
handled.add(self.b(d))
zip_safe = self.is_zip_safe(d)
for x in os.listdir(d):
if x == 'EGG-INFO':
continue
if zip_safe:
self.add_to_zipfile(zf, x, d)
else:
absp = self.j(d, x)
dest = self.j(self.pydlib, x)
if os.path.isdir(absp):
shutil.copytree(absp, dest, True)
else:
shutil.copy2(absp, dest)
for x in os.listdir(sp):
if x in handled or x.endswith('.egg-info'):
continue
absp = self.j(sp, x)
if os.path.isdir(absp):
if not os.listdir(absp):
continue
if self.is_zip_safe(absp):
self.add_to_zipfile(zf, x, sp)
else:
shutil.copytree(absp, self.j(self.pydlib, x), True)
else:
if x.endswith('.pyd'):
shutil.copy2(absp, self.j(self.pydlib, x))
else:
self.add_to_zipfile(zf, x, sp)
shutil.rmtree(self.lib_dir)
def is_zip_safe(self, path):
for f in walk(path):
ext = os.path.splitext(f)[1].lower()
if ext in ('.pyd', '.dll', '.exe'):
return False
return True
def get_pth_dirs(self, pth):
base = os.path.dirname(pth)
for line in open(pth).readlines():
line = line.strip()
if not line or line.startswith('#') or line.startswith('import'):
continue
if line == 'win32\\lib':
continue
candidate = self.j(base, line)
if os.path.exists(candidate):
yield candidate
def add_to_zipfile(self, zf, name, base, exclude=frozenset()):
abspath = self.j(base, name)
name = name.replace(os.sep, '/')
if name in self.zf_names:
raise ValueError('Already added %r to zipfile [%r]'%(name, abspath))
zinfo = zipfile.ZipInfo(filename=name, date_time=self.zf_timestamp)
if os.path.isdir(abspath):
if not os.listdir(abspath):
return
zinfo.external_attr = 0700 << 16
zf.writestr(zinfo, '')
for x in os.listdir(abspath):
if x not in exclude:
self.add_to_zipfile(zf, name + os.sep + x, base)
else:
ext = os.path.splitext(name)[1].lower()
if ext in ('.pyd', '.dll', '.exe'):
raise ValueError('Cannot add %r to zipfile'%abspath)
zinfo.external_attr = 0600 << 16
if ext in ('.py', '.pyc', '.pyo'):
with open(abspath, 'rb') as f:
zf.writestr(zinfo, f.read())
self.zf_names.add(name)

View File

@ -96,7 +96,7 @@ def main():
abs__file__() abs__file__()
addsitedir(os.path.join(sys.app_dir, 'Lib', 'site-packages')) addsitedir(os.path.join(sys.app_dir, 'pydlib'))
add_calibre_vars() add_calibre_vars()

View File

@ -198,7 +198,7 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
buf[strlen(buf)-1] = '\0'; buf[strlen(buf)-1] = '\0';
_snprintf_s(python_home, MAX_PATH, _TRUNCATE, "%s", buf); _snprintf_s(python_home, MAX_PATH, _TRUNCATE, "%s", buf);
_snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\DLLs;%s\\Lib;%s\\Lib\\site-packages", _snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\pylib.zip;%s\\pydlib;%s\\DLLs",
buf, buf, buf); buf, buf, buf);
free(buf); free(buf);

View File

@ -154,9 +154,9 @@
<CustomAction Id="LaunchApplication" BinaryKey="WixCA" <CustomAction Id="LaunchApplication" BinaryKey="WixCA"
DllEntry="WixShellExec" Impersonate="yes"/> DllEntry="WixShellExec" Impersonate="yes"/>
<InstallUISequence> <!--<InstallUISequence>
<FileCost Suppress="yes" /> <FileCost Suppress="yes" />
</InstallUISequence> </InstallUISequence>-->
</Product> </Product>
</Wix> </Wix>

View File

@ -58,12 +58,14 @@ class ANDROID(USBMS):
0x413c : { 0xb007 : [0x0100, 0x0224]}, 0x413c : { 0xb007 : [0x0100, 0x0224]},
# LG # LG
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100] }, 0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
# Archos # Archos
0x0e79 : { 0x0e79 : {
0x1400 : [0x0222, 0x0216], 0x1400 : [0x0222, 0x0216],
0x1408 : [0x0222, 0x0216], 0x1408 : [0x0222, 0x0216],
0x1411 : [0x216],
0x1417 : [0x0216],
0x1419 : [0x0216], 0x1419 : [0x0216],
0x1420 : [0x0216], 0x1420 : [0x0216],
0x1422 : [0x0216] 0x1422 : [0x0216]
@ -91,14 +93,14 @@ class ANDROID(USBMS):
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER', VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS', 'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC'] 'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE', 'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H', 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD', 'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955'] '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7'] 'A70S', 'A101IT', '7']

View File

@ -19,7 +19,7 @@ class BLACKBERRY(USBMS):
VENDOR_ID = [0x0fca] VENDOR_ID = [0x0fca]
PRODUCT_ID = [0x8004, 0x0004] PRODUCT_ID = [0x8004, 0x0004]
BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211] BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220]
VENDOR_NAME = 'RIM' VENDOR_NAME = 'RIM'
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD' WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'

View File

@ -49,6 +49,8 @@ HEURISTIC_OPTIONS = ['markup_chapter_headings',
'dehyphenate', 'renumber_headings', 'dehyphenate', 'renumber_headings',
'replace_scene_breaks'] 'replace_scene_breaks']
DEFAULT_TRUE_OPTIONS = HEURISTIC_OPTIONS + ['remove_fake_margins']
def print_help(parser, log): def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace') help = parser.format_help().encode(preferred_encoding, 'replace')
log(help) log(help)
@ -90,7 +92,7 @@ def option_recommendation_to_cli_option(add_option, rec):
if opt.long_switch == 'verbose': if opt.long_switch == 'verbose':
attrs['action'] = 'count' attrs['action'] = 'count'
attrs.pop('type', '') attrs.pop('type', '')
if opt.name in HEURISTIC_OPTIONS and rec.recommended_value is True: if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True:
switches = ['--disable-'+opt.long_switch] switches = ['--disable-'+opt.long_switch]
add_option(Option(*switches, **attrs)) add_option(Option(*switches, **attrs))
@ -162,6 +164,7 @@ def add_pipeline_options(parser, plumber):
'chapter', 'chapter_mark', 'chapter', 'chapter_mark',
'prefer_metadata_cover', 'remove_first_image', 'prefer_metadata_cover', 'remove_first_image',
'insert_metadata', 'page_breaks_before', 'insert_metadata', 'page_breaks_before',
'remove_fake_margins',
] ]
), ),

View File

@ -304,6 +304,17 @@ OptionRecommendation(name='page_breaks_before',
'before the specified elements.') 'before the specified elements.')
), ),
OptionRecommendation(name='remove_fake_margins',
recommended_value=True, level=OptionRecommendation.LOW,
help=_('Some documents specify page margins by '
'specifying a left and right margin on each individual '
'paragraph. calibre will try to detect and remove these '
'margins. Sometimes, this can cause the removal of '
'margins that should not have been removed. In this '
'case you can disable the removal.')
),
OptionRecommendation(name='margin_top', OptionRecommendation(name='margin_top',
recommended_value=5.0, level=OptionRecommendation.LOW, recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the top margin in pts. Default is %default. ' help=_('Set the top margin in pts. Default is %default. '
@ -988,9 +999,13 @@ OptionRecommendation(name='sr3_replace',
page_break_on_body=self.output_plugin.file_type in ('mobi', page_break_on_body=self.output_plugin.file_type in ('mobi',
'lit')) 'lit'))
flattener(self.oeb, self.opts) flattener(self.oeb, self.opts)
self.opts.insert_blank_line = oibl self.opts.insert_blank_line = oibl
self.opts.remove_paragraph_spacing = orps self.opts.remove_paragraph_spacing = orps
from calibre.ebooks.oeb.transforms.page_margin import RemoveFakeMargins
RemoveFakeMargins()(self.oeb, self.log, self.opts)
pr(0.9) pr(0.9)
self.flush() self.flush()

View File

@ -28,11 +28,12 @@ class Worker(Thread): # {{{
Get book details from amazons book page in a separate thread Get book details from amazons book page in a separate thread
''' '''
def __init__(self, url, result_queue, browser, log, timeout=20): def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
Thread.__init__(self) Thread.__init__(self)
self.daemon = True self.daemon = True
self.url, self.result_queue = url, result_queue self.url, self.result_queue = url, result_queue
self.log, self.timeout = log, timeout self.log, self.timeout = log, timeout
self.relevance, self.plugin = relevance, plugin
self.browser = browser.clone_browser() self.browser = browser.clone_browser()
self.cover_url = self.amazon_id = self.isbn = None self.cover_url = self.amazon_id = self.isbn = None
@ -161,6 +162,15 @@ class Worker(Thread): # {{{
else: else:
self.log.warning('Failed to find product description for url: %r'%self.url) self.log.warning('Failed to find product description for url: %r'%self.url)
mi.source_relevance = self.relevance
if self.amazon_id:
if self.isbn:
self.plugin.cache_isbn_to_identifier(self.isbn, self.amazon_id)
if self.cover_url:
self.cache_identifier_to_cover_url(self.amazon_id,
self.cover_url)
self.result_queue.put(mi) self.result_queue.put(mi)
def parse_asin(self, root): def parse_asin(self, root):
@ -321,6 +331,20 @@ class Amazon(Source):
# }}} # }}}
def get_cached_cover_url(self, identifiers):
url = None
asin = identifiers.get('amazon', None)
if asin is None:
asin = identifiers.get('asin', None)
if asin is None:
isbn = identifiers.get('isbn', None)
if isbn is not None:
asin = self.cached_isbn_to_identifier(isbn)
if asin is not None:
url = self.cached_identifier_to_cover_url(asin)
return url
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{ def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
identifiers={}, timeout=30): identifiers={}, timeout=30):
''' '''
@ -396,7 +420,8 @@ class Amazon(Source):
log.error('No matches found with query: %r'%query) log.error('No matches found with query: %r'%query)
return return
workers = [Worker(url, result_queue, br, log) for url in matches] workers = [Worker(url, result_queue, br, log, i, self) for i, url in
enumerate(matches)]
for w in workers: for w in workers:
w.start() w.start()
@ -414,14 +439,6 @@ class Amazon(Source):
if not a_worker_is_alive: if not a_worker_is_alive:
break break
for w in workers:
if w.amazon_id:
if w.isbn:
self.cache_isbn_to_identifier(w.isbn, w.amazon_id)
if w.cover_url:
self.cache_identifier_to_cover_url(w.amazon_id,
w.cover_url)
return None return None
# }}} # }}}

View File

@ -21,6 +21,21 @@ def create_log(ostream=None):
log.outputs = [FileStream(ostream)] log.outputs = [FileStream(ostream)]
return log return log
words = ("the", "a", "an", "of", "and")
prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
trailing_paren_pat = re.compile(r'\(.*\)$')
whitespace_pat = re.compile(r'\s+')
def cleanup_title(s):
if not s:
s = _('Unknown')
s = s.strip().lower()
s = prefix_pat.sub(' ', s)
s = trailing_paren_pat.sub('', s)
s = whitespace_pat.sub(' ', s)
return s.strip()
class Source(Plugin): class Source(Plugin):
type = _('Metadata source') type = _('Metadata source')
@ -128,10 +143,91 @@ class Source(Plugin):
gr.append(job) gr.append(job)
return [g for g in groups if g] return [g for g in groups if g]
def test_fields(self, mi):
'''
Return the first field from self.touched_fields that is null on the
mi object
'''
for key in self.touched_fields:
if key.startswith('identifier:'):
key = key.partition(':')[-1]
if not mi.has_identifier(key):
return 'identifier: ' + key
elif mi.is_null(key):
return key
# }}} # }}}
# Metadata API {{{ # Metadata API {{{
def get_cached_cover_url(self, identifiers):
'''
Return cached cover URL for the book identified by
the identifiers dict or Noneif no such URL exists
'''
return None
def compare_identify_results(self, x, y, title=None, authors=None,
identifiers={}):
'''
Method used to sort the results from a call to identify by relevance.
Uses the actual query and various heuristics to rank results.
Re-implement in your plugin if this generic algorithm is not suitable.
Note that this method assumes x and y have a source_relevance
attribute.
one < two iff one is more relevant than two
'''
# First, guarantee that if the query specifies an ISBN, the result with
# the same isbn is the most relevant
def isbn_test(mi):
return mi.isbn and mi.isbn == identifiers.get('isbn', None)
def boolcmp(a, b):
return -1 if a and not b else 1 if not a and b else 0
x_has_isbn, y_has_isbn = isbn_test(x), isbn_test(y)
result = boolcmp(x_has_isbn, y_has_isbn)
if result != 0:
return result
# Now prefer results that have complete metadata over those that don't
x_has_all_fields = self.test_fields(x) is None
y_has_all_fields = self.test_fields(y) is None
result = boolcmp(x_has_all_fields, y_has_all_fields)
if result != 0:
return result
# Now prefer results whose title matches the search query
if title:
x_title = cleanup_title(x.title)
y_title = cleanup_title(y.title)
t = cleanup_title(title)
x_has_title, y_has_title = x_title == t, y_title == t
result = boolcmp(x_has_title, y_has_title)
if result != 0:
return result
# Now prefer results with the longer comments, within 10%
cx = len(x.comments.strip() if x.comments else '')
cy = len(y.comments.strip() if y.comments else '')
t = (cx + cy) / 20
result = cy - cx
if result != 0 and abs(cx - cy) > t:
return result
# Now prefer results with cached cover URLs
x_has_cover = self.get_cached_cover_url(x.identifiers) is not None
y_has_cover = self.get_cached_cover_url(y.identifiers) is not None
result = boolcmp(x_has_cover, y_has_cover)
if result != 0:
return result
# Now use the relevance reported by the remote search engine
return x.source_relevance - y.source_relevance
def identify(self, log, result_queue, abort, title=None, authors=None, def identify(self, log, result_queue, abort, title=None, authors=None,
identifiers={}, timeout=5): identifiers={}, timeout=5):
''' '''
@ -147,6 +243,15 @@ class Source(Plugin):
the same ISBN/special identifier does not need to get the cover URL the same ISBN/special identifier does not need to get the cover URL
again. Use the caching API for this. again. Use the caching API for this.
Every Metadata object put into result_queue by this method must have a
`source_relevance` attribute that is an integer indicating the order in
which the results were returned by the metadata source for this query.
This integer will be used by :meth:`compare_identify_results`. If the
order is unimportant, set it to zero for every result.
Make sure that any cover/isbn mapping information is cached before the
Metadata object is put into result_queue.
:param log: A log object, use it to output debugging information/errors :param log: A log object, use it to output debugging information/errors
:param result_queue: A result Queue, results should be put into it. :param result_queue: A result Queue, results should be put into it.
Each result is a Metadata object Each result is a Metadata object

View File

@ -190,14 +190,15 @@ class GoogleBooks(Source):
return raw and len(raw) > 17000 and raw[1:4] != 'PNG' return raw and len(raw) > 17000 and raw[1:4] != 'PNG'
def get_all_details(self, br, log, entries, abort, result_queue, timeout): def get_all_details(self, br, log, entries, abort, result_queue, timeout):
for i in entries: for relevance, i in enumerate(entries):
try: try:
ans = to_metadata(br, log, i, timeout) ans = to_metadata(br, log, i, timeout)
if isinstance(ans, Metadata): if isinstance(ans, Metadata):
result_queue.put(ans) ans.source_relevance = relevance
for isbn in getattr(ans, 'all_isbns', []): for isbn in getattr(ans, 'all_isbns', []):
self.cache_isbn_to_identifier(isbn, self.cache_isbn_to_identifier(isbn,
ans.identifiers['google']) ans.identifiers['google'])
result_queue.put(ans)
except: except:
log.exception( log.exception(
'Failed to get metadata for identify entry:', 'Failed to get metadata for identify entry:',

View File

@ -46,15 +46,6 @@ def authors_test(authors):
return test return test
def _test_fields(touched_fields, mi):
for key in touched_fields:
if key.startswith('identifier:'):
key = key.partition(':')[-1]
if not mi.has_identifier(key):
return 'identifier: ' + key
elif mi.is_null(key):
return key
def test_identify_plugin(name, tests): def test_identify_plugin(name, tests):
''' '''
@ -120,11 +111,10 @@ def test_identify_plugin(name, tests):
prints('Log saved to', lf) prints('Log saved to', lf)
raise SystemExit(1) raise SystemExit(1)
good = [x for x in possibles if _test_fields(plugin.touched_fields, x) is good = [x for x in possibles if plugin.test_fields(x) is
None] None]
if not good: if not good:
prints('Failed to find', _test_fields(plugin.touched_fields, prints('Failed to find', plugin.test_fields(possibles[0]))
possibles[0]))
raise SystemExit(1) raise SystemExit(1)

View File

@ -0,0 +1,153 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from collections import Counter
from calibre.ebooks.oeb.base import OEB_STYLES, barename, XPath
class RemoveFakeMargins(object):
'''
Remove left and right margins from paragraph/divs if the same margin is specified
on almost all the elements of at that level.
Must be called only after CSS flattening
'''
def __call__(self, oeb, log, opts):
if not opts.remove_fake_margins:
return
self.oeb, self.log, self.opts = oeb, log, opts
stylesheet = None
self.levels = {}
self.stats = {}
self.selector_map = {}
for item in self.oeb.manifest:
if item.media_type.lower() in OEB_STYLES:
stylesheet = item
break
if stylesheet is None:
return
self.log('Removing fake margins...')
stylesheet = stylesheet.data
from cssutils.css import CSSRule
for rule in stylesheet.cssRules.rulesOfType(CSSRule.STYLE_RULE):
self.selector_map[rule.selectorList.selectorText] = rule.style
self.find_levels()
for level in self.levels:
self.process_level(level)
def get_margins(self, elem):
cls = elem.get('class', None)
if cls:
style = self.selector_map.get('.'+cls, None)
if style:
return style.marginLeft, style.marginRight, style
return '', '', None
def process_level(self, level):
elems = self.levels[level]
self.stats[level+'_left'] = Counter()
self.stats[level+'_right'] = Counter()
for elem in elems:
lm, rm = self.get_margins(elem)[:2]
self.stats[level+'_left'][lm] += 1
self.stats[level+'_right'][rm] += 1
self.log.debug(level, ' left margin stats:', self.stats[level+'_left'])
self.log.debug(level, ' right margin stats:', self.stats[level+'_right'])
remove_left = self.analyze_stats(self.stats[level+'_left'])
remove_right = self.analyze_stats(self.stats[level+'_right'])
if remove_left:
mcl = self.stats[level+'_left'].most_common(1)[0][0]
self.log('Removing level %s left margin of:'%level, mcl)
if remove_right:
mcr = self.stats[level+'_right'].most_common(1)[0][0]
self.log('Removing level %s right margin of:'%level, mcr)
if remove_left or remove_right:
for elem in elems:
lm, rm, style = self.get_margins(elem)
if remove_left and lm == mcl:
style.removeProperty('margin-left')
if remove_right and rm == mcr:
style.removeProperty('margin-right')
def find_levels(self):
def level_of(elem, body):
ans = 1
while elem.getparent() is not body:
ans += 1
elem = elem.getparent()
return ans
paras = XPath('descendant::h:p|descendant::h:div')
for item in self.oeb.spine:
body = XPath('//h:body')(item.data)
if not body:
continue
body = body[0]
for p in paras(body):
level = level_of(p, body)
level = '%s_%d'%(barename(p.tag), level)
if level not in self.levels:
self.levels[level] = []
self.levels[level].append(p)
remove = set()
for k, v in self.levels.iteritems():
num = len(v)
self.log.debug('Found %d items of level:'%num, k)
level = int(k.split('_')[-1])
tag = k.split('_')[0]
if tag == 'p' and num < 25:
remove.add(k)
if tag == 'div':
if level > 2 and num < 25:
remove.add(k)
elif level < 3:
# Check each level < 3 element and only keep those
# that have many child paras
for elem in list(v):
children = len(paras(elem))
if children < 5:
v.remove(elem)
for k in remove:
self.levels.pop(k)
self.log.debug('Ignoring level', k)
def analyze_stats(self, stats):
if not stats:
return False
mc = stats.most_common(1)
if len(mc) > 1:
return False
mc = mc[0]
most_common, most_common_count = mc
if not most_common or most_common == '0':
return False
total = sum(stats.values())
# True if greater than 95% of elements have the same margin
return most_common_count/total > 0.95

View File

@ -22,6 +22,7 @@ border_style_map = {
'dot-dot-dash': 'dotted', 'dot-dot-dash': 'dotted',
'outset': 'outset', 'outset': 'outset',
'tripple': 'double', 'tripple': 'double',
'triple': 'double',
'thick-thin-small': 'solid', 'thick-thin-small': 'solid',
'thin-thick-small': 'solid', 'thin-thick-small': 'solid',
'thin-thick-thin-small': 'solid', 'thin-thick-thin-small': 'solid',

View File

@ -121,97 +121,108 @@ class Textile(object):
btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p') btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
btag_lite = ('bq', 'bc', 'p') btag_lite = ('bq', 'bc', 'p')
glyph_defaults = ( macro_defaults = [
('mac_cent', '&#162;'), (re.compile(r'{(c\||\|c)}'), r'&#162;'), # cent
('mac_pound', '&#163;'), (re.compile(r'{(L-|-L)}'), r'&#163;'), # pound
('mac_yen', '&#165;'), (re.compile(r'{(Y=|=Y)}'), r'&#165;'), # yen
('mac_quarter', '&#188;'), (re.compile(r'{\(c\)}'), r'&#169;'), # copyright
('mac_half', '&#189;'), (re.compile(r'{\(r\)}'), r'&#174;'), # registered
('mac_three-quarter', '&#190;'), (re.compile(r'{(\+_|_\+)}'), r'&#177;'), # plus-minus
('mac_cA-grave', '&#192;'), (re.compile(r'{1/4}'), r'&#188;'), # quarter
('mac_cA-acute', '&#193;'), (re.compile(r'{1/2}'), r'&#189;'), # half
('mac_cA-circumflex', '&#194;'), (re.compile(r'{3/4}'), r'&#190;'), # three-quarter
('mac_cA-tilde', '&#195;'), (re.compile(r'{(A`|`A)}'), r'&#192;'), # A-acute
('mac_cA-diaeresis', '&#196;'), (re.compile(r'{(A\'|\'A)}'), r'&#193;'), # A-grave
('mac_cA-ring', '&#197;'), (re.compile(r'{(A\^|\^A)}'), r'&#194;'), # A-circumflex
('mac_cAE', '&#198;'), (re.compile(r'{(A~|~A)}'), r'&#195;'), # A-tilde
('mac_cC-cedilla', '&#199;'), (re.compile(r'{(A\"|\"A)}'), r'&#196;'), # A-diaeresis
('mac_cE-grave', '&#200;'), (re.compile(r'{(Ao|oA)}'), r'&#197;'), # A-ring
('mac_cE-acute', '&#201;'), (re.compile(r'{(AE)}'), r'&#198;'), # AE
('mac_cE-circumflex', '&#202;'), (re.compile(r'{(C,|,C)}'), r'&#199;'), # C-cedilla
('mac_cE-diaeresis', '&#203;'), (re.compile(r'{(E`|`E)}'), r'&#200;'), # E-acute
('mac_cI-grave', '&#204;'), (re.compile(r'{(E\'|\'E)}'), r'&#201;'), # E-grave
('mac_cI-acute', '&#205;'), (re.compile(r'{(E\^|\^E)}'), r'&#202;'), # E-circumflex
('mac_cI-circumflex', '&#206;'), (re.compile(r'{(E\"|\"E)}'), r'&#203;'), # E-diaeresis
('mac_cI-diaeresis', '&#207;'), (re.compile(r'{(I`|`I)}'), r'&#204;'), # I-acute
('mac_cEth', '&#208;'), (re.compile(r'{(I\'|\'I)}'), r'&#205;'), # I-grave
('mac_cN-tilde', '&#209;'), (re.compile(r'{(I\^|\^I)}'), r'&#206;'), # I-circumflex
('mac_cO-grave', '&#210;'), (re.compile(r'{(I\"|\"I)}'), r'&#207;'), # I-diaeresis
('mac_cO-acute', '&#211;'), (re.compile(r'{(D-|-D)}'), r'&#208;'), # ETH
('mac_cO-circumflex', '&#212;'), (re.compile(r'{(N~|~N)}'), r'&#209;'), # N-tilde
('mac_cO-tilde', '&#213;'), (re.compile(r'{(O`|`O)}'), r'&#210;'), # O-acute
('mac_cO-diaeresis', '&#214;'), (re.compile(r'{(O\'|\'O)}'), r'&#211;'), # O-grave
('mac_cO-stroke', '&#216;'), (re.compile(r'{(O\^|\^O)}'), r'&#212;'), # O-circumflex
('mac_cU-grave', '&#217;'), (re.compile(r'{(O~|~O)}'), r'&#213;'), # O-tilde
('mac_cU-acute', '&#218;'), (re.compile(r'{(O\"|\"O)}'), r'&#214;'), # O-diaeresis
('mac_cU-circumflex', '&#219;'), (re.compile(r'{x}'), r'&#215;'), # dimension
('mac_cU-diaeresis', '&#220;'), (re.compile(r'{(O\/|\/O)}'), r'&#216;'), # O-slash
('mac_cY-acute', '&#221;'), (re.compile(r'{(U`|`U)}'), r'&#217;'), # U-acute
('mac_sa-grave', '&#224;'), (re.compile(r'{(U\'|\'U)}'), r'&#218;'), # U-grave
('mac_sa-acute', '&#225;'), (re.compile(r'{(U\^|\^U)}'), r'&#219;'), # U-circumflex
('mac_sa-circumflex', '&#226;'), (re.compile(r'{(U\"|\"U)}'), r'&#220;'), # U-diaeresis
('mac_sa-tilde', '&#227;'), (re.compile(r'{(Y\'|\'Y)}'), r'&#221;'), # Y-grave
('mac_sa-diaeresis', '&#228;'), (re.compile(r'{sz}'), r'&szlig;'), # sharp-s
('mac_sa-ring', '&#229;'), (re.compile(r'{(a`|`a)}'), r'&#224;'), # a-grave
('mac_sae', '&#230;'), (re.compile(r'{(a\'|\'a)}'), r'&#225;'), # a-acute
('mac_sc-cedilla', '&#231;'), (re.compile(r'{(a\^|\^a)}'), r'&#226;'), # a-circumflex
('mac_se-grave', '&#232;'), (re.compile(r'{(a~|~a)}'), r'&#227;'), # a-tilde
('mac_se-acute', '&#233;'), (re.compile(r'{(a\"|\"a)}'), r'&#228;'), # a-diaeresis
('mac_se-circumflex', '&#234;'), (re.compile(r'{(ao|oa)}'), r'&#229;'), # a-ring
('mac_se-diaeresis', '&#235;'), (re.compile(r'{ae}'), r'&#230;'), # ae
('mac_si-grave', '&#236;'), (re.compile(r'{(c,|,c)}'), r'&#231;'), # c-cedilla
('mac_si-acute', '&#237;'), (re.compile(r'{(e`|`e)}'), r'&#232;'), # e-grave
('mac_si-circumflex', '&#238;'), (re.compile(r'{(e\'|\'e)}'), r'&#233;'), # e-acute
('mac_si-diaeresis', '&#239;'), (re.compile(r'{(e\^|\^e)}'), r'&#234;'), # e-circumflex
('mac_sn-tilde', '&#241;'), (re.compile(r'{(e\"|\"e)}'), r'&#235;'), # e-diaeresis
('mac_so-grave', '&#242;'), (re.compile(r'{(i`|`i)}'), r'&#236;'), # i-grave
('mac_so-acute', '&#243;'), (re.compile(r'{(i\'|\'i)}'), r'&#237;'), # i-acute
('mac_so-circumflex', '&#244;'), (re.compile(r'{(i\^|\^i)}'), r'&#238;'), # i-circumflex
('mac_so-tilde', '&#245;'), (re.compile(r'{(i\"|\"i)}'), r'&#239;'), # i-diaeresis
('mac_so-diaeresis', '&#246;'), (re.compile(r'{(d-|-d)}'), r'&#240;'), # eth
('mac_so-stroke', '&#248;'), (re.compile(r'{(n~|~n)}'), r'&#241;'), # n-tilde
('mac_su-grave', '&#249;'), (re.compile(r'{(o`|`o)}'), r'&#242;'), # o-grave
('mac_su-acute', '&#250;'), (re.compile(r'{(o\'|\'o)}'), r'&#243;'), # o-acute
('mac_su-circumflex', '&#251;'), (re.compile(r'{(o\^|\^o)}'), r'&#244;'), # o-circumflex
('mac_su-diaeresis', '&#252;'), (re.compile(r'{(o~|~o)}'), r'&#245;'), # o-tilde
('mac_sy-acute', '&#253;'), (re.compile(r'{(o\"|\"o)}'), r'&#246;'), # o-diaeresis
('mac_sy-diaeresis', '&#255;'), (re.compile(r'{(o\/|\/o)}'), r'&#248;'), # o-stroke
('mac_cOE', '&#338;'), (re.compile(r'{(u`|`u)}'), r'&#249;'), # u-grave
('mac_soe', '&#339;'), (re.compile(r'{(u\'|\'u)}'), r'&#250;'), # u-acute
('mac_bullet', '&#8226;'), (re.compile(r'{(u\^|\^u)}'), r'&#251;'), # u-circumflex
('mac_franc', '&#8355;'), (re.compile(r'{(u\"|\"u)}'), r'&#252;'), # u-diaeresis
('mac_lira', '&#8356;'), (re.compile(r'{(y\'|\'y)}'), r'&#253;'), # y-acute
('mac_rupee', '&#8360;'), (re.compile(r'{(y\"|\"y)}'), r'&#255;'), # y-diaeresis
('mac_euro', '&#8364;'), (re.compile(r'{OE}'), r'&#338;'), # OE
('mac_spade', '&#9824;'), (re.compile(r'{oe}'), r'&#339;'), # oe
('mac_club', '&#9827;'), (re.compile(r'{(S\^|\^S)}'), r'&Scaron;'), # Scaron
('mac_heart', '&#9829;'), (re.compile(r'{(s\^|\^s)}'), r'&scaron;'), # scaron
('mac_diamond', '&#9830;'), (re.compile(r'{\*}'), r'&#8226;'), # bullet
('txt_dimension', '&#215;'), (re.compile(r'{Fr}'), r'&#8355;'), # Franc
('txt_quote_single_open', '&#8216;'), (re.compile(r'{(L=|=L)}'), r'&#8356;'), # Lira
('txt_quote_single_close', '&#8217;'), (re.compile(r'{Rs}'), r'&#8360;'), # Rupee
('txt_quote_double_open', '&#8220;'), (re.compile(r'{(C=|=C)}'), r'&#8364;'), # euro
('txt_quote_double_close', '&#8221;'), (re.compile(r'{tm}'), r'&#8482;'), # trademark
('txt_apostrophe', '&#8217;'), (re.compile(r'{spades?}'), r'&#9824;'), # spade
('txt_prime', '&#8242;'), (re.compile(r'{clubs?}'), r'&#9827;'), # club
('txt_prime_double', '&#8243;'), (re.compile(r'{hearts?}'), r'&#9829;'), # heart
('txt_ellipsis', '&#8230;'), (re.compile(r'{diam(onds?|s)}'), r'&#9830;'), # diamond
('txt_emdash', '&#8212;'), ]
('txt_endash', '&#8211;'), glyph_defaults = [
('txt_trademark', '&#8482;'), (re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2&#215;\3'), # dimension sign
('txt_registered', '&#174;'), (re.compile(r'(\d+)\'', re.I), r'\1&#8242;'), # prime
('txt_copyright', '&#169;'), (re.compile(r'(\d+)\"', re.I), r'\1&#8243;'), # prime-double
) (re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1&#8230;'), # ellipsis
(re.compile(r'^[\*_-]{3,}$', re.M), r'<hr />'), # <hr> scene-break
(re.compile(r'\b--\b'), r'&#8212;'), # em dash
(re.compile(r'(\s)--(\s)'), r'\1&#8212;\2'), # em dash
(re.compile(r'\s-(?:\s|$)'), r' &#8211; '), # en dash
(re.compile(r'\b( ?)[([]TM[])]', re.I), r'\1&#8482;'), # trademark
(re.compile(r'\b( ?)[([]R[])]', re.I), r'\1&#174;'), # registered
(re.compile(r'\b( ?)[([]C[])]', re.I), r'\1&#169;'), # copyright
]
def __init__(self, restricted=False, lite=False, noimage=False): def __init__(self, restricted=False, lite=False, noimage=False):
"""docstring for __init__""" """docstring for __init__"""
@ -673,211 +684,15 @@ class Textile(object):
# fix: hackish # fix: hackish
text = re.sub(r'"\Z', '\" ', text) text = re.sub(r'"\Z', '\" ', text)
glyph_search = (
re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), # dimension sign
re.compile(r"(\w)\'(\w)"), # apostrophe's
re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88
re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing
re.compile(r'\'/'), # single opening
re.compile(r'(\")\"'), # double closing - following another
re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing
re.compile(r'"'), # double opening
re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym
re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), # 3+ uppercase
re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis
re.compile(r'(\s?)--(\s?)'), # em dash
re.compile(r'\s-(?:\s|$)'), # en dash
re.compile(r'\b( ?)[([]TM[])]', re.I), # trademark
re.compile(r'\b( ?)[([]R[])]', re.I), # registered
re.compile(r'\b( ?)[([]C[])]', re.I) # copyright
)
glyph_replace = [x % dict(self.glyph_defaults) for x in (
r'\1\2%(txt_dimension)s\3', # dimension sign
r'\1%(txt_apostrophe)s\2', # apostrophe's
r'\1%(txt_apostrophe)s\2', # back in '88
r'\1%(txt_quote_single_close)s', # single closing
r'%(txt_quote_single_open)s', # single opening
r'\1%(txt_quote_double_close)s', # double closing - following another
r'\1%(txt_quote_double_close)s', # double closing
r'%(txt_quote_double_open)s', # double opening
r'<acronym title="\2">\1</acronym>', # 3+ uppercase acronym
r'<span class="caps">\1</span>', # 3+ uppercase
r'\1%(txt_ellipsis)s', # ellipsis
r'\1%(txt_emdash)s\2', # em dash
r' %(txt_endash)s ', # en dash
r'\1%(txt_trademark)s', # trademark
r'\1%(txt_registered)s', # registered
r'\1%(txt_copyright)s' # copyright
)]
if re.search(r'{.+?}', text):
glyph_search += (
re.compile(r'{(c\||\|c)}'), # cent
re.compile(r'{(L-|-L)}'), # pound
re.compile(r'{(Y=|=Y)}'), # yen
re.compile(r'{\(c\)}'), # copyright
re.compile(r'{\(r\)}'), # registered
re.compile(r'{1/4}'), # quarter
re.compile(r'{1/2}'), # half
re.compile(r'{3/4}'), # three-quarter
re.compile(r'{(A`|`A)}'), # 192;
re.compile(r'{(A\'|\'A)}'), # 193;
re.compile(r'{(A\^|\^A)}'), # 194;
re.compile(r'{(A~|~A)}'), # 195;
re.compile(r'{(A\"|\"A)}'), # 196;
re.compile(r'{(Ao|oA)}'), # 197;
re.compile(r'{(AE)}'), # 198;
re.compile(r'{(C,|,C)}'), # 199;
re.compile(r'{(E`|`E)}'), # 200;
re.compile(r'{(E\'|\'E)}'), # 201;
re.compile(r'{(E\^|\^E)}'), # 202;
re.compile(r'{(E\"|\"E)}'), # 203;
re.compile(r'{(I`|`I)}'), # 204;
re.compile(r'{(I\'|\'I)}'), # 205;
re.compile(r'{(I\^|\^I)}'), # 206;
re.compile(r'{(I\"|\"I)}'), # 207;
re.compile(r'{(D-|-D)}'), # 208;
re.compile(r'{(N~|~N)}'), # 209;
re.compile(r'{(O`|`O)}'), # 210;
re.compile(r'{(O\'|\'O)}'), # 211;
re.compile(r'{(O\^|\^O)}'), # 212;
re.compile(r'{(O~|~O)}'), # 213;
re.compile(r'{(O\"|\"O)}'), # 214;
re.compile(r'{(O\/|\/O)}'), # 215;
re.compile(r'{(U`|`U)}'), # 216;
re.compile(r'{(U\'|\'U)}'), # 217;
re.compile(r'{(U\^|\^U)}'), # 218;
re.compile(r'{(U\"|\"U)}'), # 219;
re.compile(r'{(Y\'|\'Y)}'), # 220;
re.compile(r'{(a`|`a)}'), # a-grace
re.compile(r'{(a\'|\'a)}'), # a-acute
re.compile(r'{(a\^|\^a)}'), # a-circumflex
re.compile(r'{(a~|~a)}'), # a-tilde
re.compile(r'{(a\"|\"a)}'), # a-diaeresis
re.compile(r'{(ao|oa)}'), # a-ring
re.compile(r'{ae}'), # ae
re.compile(r'{(c,|,c)}'), # c-cedilla
re.compile(r'{(e`|`e)}'), # e-grace
re.compile(r'{(e\'|\'e)}'), # e-acute
re.compile(r'{(e\^|\^e)}'), # e-circumflex
re.compile(r'{(e\"|\"e)}'), # e-diaeresis
re.compile(r'{(i`|`i)}'), # i-grace
re.compile(r'{(i\'|\'i)}'), # i-acute
re.compile(r'{(i\^|\^i)}'), # i-circumflex
re.compile(r'{(i\"|\"i)}'), # i-diaeresis
re.compile(r'{(n~|~n)}'), # n-tilde
re.compile(r'{(o`|`o)}'), # o-grace
re.compile(r'{(o\'|\'o)}'), # o-acute
re.compile(r'{(o\^|\^o)}'), # o-circumflex
re.compile(r'{(o~|~o)}'), # o-tilde
re.compile(r'{(o\"|\"o)}'), # o-diaeresis
re.compile(r'{(o\/|\/o)}'), # o-stroke
re.compile(r'{(u`|`u)}'), # u-grace
re.compile(r'{(u\'|\'u)}'), # u-acute
re.compile(r'{(u\^|\^u)}'), # u-circumflex
re.compile(r'{(u\"|\"u)}'), # u-diaeresis
re.compile(r'{(y\'|\'y)}'), # y-acute
re.compile(r'{(y\"|\"y)}'), # y-diaeresis
re.compile(r'{OE}'), # y-diaeresis
re.compile(r'{oe}'), # y-diaeresis
re.compile(r'{\*}'), # bullet
re.compile(r'{Fr}'), # Franc
re.compile(r'{(L=|=L)}'), # Lira
re.compile(r'{Rs}'), # Rupee
re.compile(r'{(C=|=C)}'), # euro
re.compile(r'{tm}'), # euro
re.compile(r'{spade}'), # spade
re.compile(r'{club}'), # club
re.compile(r'{heart}'), # heart
re.compile(r'{diamond}') # diamond
)
glyph_replace += [x % dict(self.glyph_defaults) for x in (
r'%(mac_cent)s', # cent
r'%(mac_pound)s', # pound
r'%(mac_yen)s', # yen
r'%(txt_copyright)s', # copyright
r'%(txt_registered)s', # registered
r'%(mac_quarter)s', # quarter
r'%(mac_half)s', # half
r'%(mac_three-quarter)s', # three-quarter
r'%(mac_cA-grave)s', # 192;
r'%(mac_cA-acute)s', # 193;
r'%(mac_cA-circumflex)s', # 194;
r'%(mac_cA-tilde)s', # 195;
r'%(mac_cA-diaeresis)s', # 196;
r'%(mac_cA-ring)s', # 197;
r'%(mac_cAE)s', # 198;
r'%(mac_cC-cedilla)s', # 199;
r'%(mac_cE-grave)s', # 200;
r'%(mac_cE-acute)s', # 201;
r'%(mac_cE-circumflex)s', # 202;
r'%(mac_cE-diaeresis)s', # 203;
r'%(mac_cI-grave)s', # 204;
r'%(mac_cI-acute)s', # 205;
r'%(mac_cI-circumflex)s', # 206;
r'%(mac_cI-diaeresis)s', # 207;
r'%(mac_cEth)s', # 208;
r'%(mac_cN-tilde)s', # 209;
r'%(mac_cO-grave)s', # 210;
r'%(mac_cO-acute)s', # 211;
r'%(mac_cO-circumflex)s', # 212;
r'%(mac_cO-tilde)s', # 213;
r'%(mac_cO-diaeresis)s', # 214;
r'%(mac_cO-stroke)s', # 216;
r'%(mac_cU-grave)s', # 217;
r'%(mac_cU-acute)s', # 218;
r'%(mac_cU-circumflex)s', # 219;
r'%(mac_cU-diaeresis)s', # 220;
r'%(mac_cY-acute)s', # 221;
r'%(mac_sa-grave)s', # 224;
r'%(mac_sa-acute)s', # 225;
r'%(mac_sa-circumflex)s', # 226;
r'%(mac_sa-tilde)s', # 227;
r'%(mac_sa-diaeresis)s', # 228;
r'%(mac_sa-ring)s', # 229;
r'%(mac_sae)s', # 230;
r'%(mac_sc-cedilla)s', # 231;
r'%(mac_se-grave)s', # 232;
r'%(mac_se-acute)s', # 233;
r'%(mac_se-circumflex)s', # 234;
r'%(mac_se-diaeresis)s', # 235;
r'%(mac_si-grave)s', # 236;
r'%(mac_si-acute)s', # 237;
r'%(mac_si-circumflex)s', # 238;
r'%(mac_si-diaeresis)s', # 239;
r'%(mac_sn-tilde)s', # 241;
r'%(mac_so-grave)s', # 242;
r'%(mac_so-acute)s', # 243;
r'%(mac_so-circumflex)s', # 244;
r'%(mac_so-tilde)s', # 245;
r'%(mac_so-diaeresis)s', # 246;
r'%(mac_so-stroke)s', # 248;
r'%(mac_su-grave)s', # 249;
r'%(mac_su-acute)s', # 250;
r'%(mac_su-circumflex)s', # 251;
r'%(mac_su-diaeresis)s', # 252;
r'%(mac_sy-acute)s', # 253;
r'%(mac_sy-diaeresis)s', # 255;
r'%(mac_cOE)s', # 338;
r'%(mac_soe)s', # 339;
r'%(mac_bullet)s', # bullet
r'%(mac_franc)s', # franc
r'%(mac_lira)s', # lira
r'%(mac_rupee)s', # rupee
r'%(mac_euro)s', # euro
r'%(txt_trademark)s', # trademark
r'%(mac_spade)s', # spade
r'%(mac_club)s', # club
r'%(mac_heart)s', # heart
r'%(mac_diamond)s' # diamond
)]
result = [] result = []
for line in re.compile(r'(<.*?>)', re.U).split(text): for line in re.compile(r'(<.*?>)', re.U).split(text):
if not re.search(r'<.*>', line): if not re.search(r'<.*>', line):
for s, r in zip(glyph_search, glyph_replace): rules = []
if re.search(r'{.+?}', line):
rules = self.macro_defaults + self.glyph_defaults
else:
rules = self.glyph_defaults
for s, r in rules:
line = s.sub(r, line) line = s.sub(r, line)
result.append(line) result.append(line)
return ''.join(result) return ''.join(result)
@ -927,7 +742,7 @@ class Textile(object):
return url return url
def shelve(self, text): def shelve(self, text):
id = str(uuid.uuid4()) id = str(uuid.uuid4()) + 'c'
self.shelf[id] = text self.shelf[id] = text
return id return id
@ -1049,7 +864,7 @@ class Textile(object):
for qtag in qtags: for qtag in qtags:
pattern = re.compile(r""" pattern = re.compile(r"""
(?:^|(?<=[\s>%(pnct)s])|\[|([\]}])) (?:^|(?<=[\s>%(pnct)s\(])|\[|([\]}]))
(%(qtag)s)(?!%(qtag)s) (%(qtag)s)(?!%(qtag)s)
(%(c)s) (%(c)s)
(?::(\S+))? (?::(\S+))?

View File

@ -165,6 +165,7 @@ class TXTInput(InputFormatPlugin):
elif options.formatting_type == 'textile': elif options.formatting_type == 'textile':
log.debug('Running text through textile conversion...') log.debug('Running text through textile conversion...')
html = convert_textile(txt) html = convert_textile(txt)
setattr(options, 'smarten_punctuation', True)
else: else:
log.debug('Running text through basic conversion...') log.debug('Running text through basic conversion...')
flow_size = getattr(options, 'flow_size', 0) flow_size = getattr(options, 'flow_size', 0)

View File

@ -25,8 +25,11 @@ class PreferencesAction(InterfaceAction):
self.gui.run_wizard) self.gui.run_wizard)
if not DEBUG: if not DEBUG:
pm.addSeparator() pm.addSeparator()
pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'), ac = pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
self.debug_restart) self.debug_restart)
ac.setShortcut('Ctrl+Shift+R')
self.gui.addAction(ac)
self.qaction.setMenu(pm) self.qaction.setMenu(pm)
self.preferences_menu = pm self.preferences_menu = pm
for x in (self.gui.preferences_action, self.qaction): for x in (self.gui.preferences_action, self.qaction):

View File

@ -21,7 +21,7 @@ class StructureDetectionWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None): def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, Widget.__init__(self, parent,
['chapter', 'chapter_mark', ['chapter', 'chapter_mark',
'remove_first_image', 'remove_first_image', 'remove_fake_margins',
'insert_metadata', 'page_breaks_before'] 'insert_metadata', 'page_breaks_before']
) )
self.db, self.book_id = db, book_id self.db, self.book_id = db, book_id

View File

@ -48,10 +48,10 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="6" column="0" colspan="3"> <item row="7" column="0" colspan="3">
<widget class="XPathEdit" name="opt_page_breaks_before" native="true"/> <widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
</item> </item>
<item row="7" column="0" colspan="3"> <item row="8" column="0" colspan="3">
<spacer name="verticalSpacer"> <spacer name="verticalSpacer">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
@ -77,7 +77,7 @@
</property> </property>
</spacer> </spacer>
</item> </item>
<item row="4" column="0" colspan="3"> <item row="5" column="0" colspan="3">
<widget class="QLabel" name="label_2"> <widget class="QLabel" name="label_2">
<property name="text"> <property name="text">
<string>The header and footer removal options have been replaced by the Search &amp; Replace options. Click the Search &amp; Replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.</string> <string>The header and footer removal options have been replaced by the Search &amp; Replace options. Click the Search &amp; Replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.</string>
@ -87,6 +87,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="2">
<widget class="QCheckBox" name="opt_remove_fake_margins">
<property name="text">
<string>Remove &amp;fake margins</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<customwidgets> <customwidgets>

View File

@ -16,8 +16,7 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, QFont, QSize, \
QIcon, QPoint, QVBoxLayout, QHBoxLayout, QComboBox, QTimer,\ QIcon, QPoint, QVBoxLayout, QHBoxLayout, QComboBox, QTimer,\
QAbstractItemModel, QVariant, QModelIndex, QMenu, QFrame,\ QAbstractItemModel, QVariant, QModelIndex, QMenu, QFrame,\
QPushButton, QWidget, QItemDelegate, QString, QLabel, \ QPushButton, QWidget, QItemDelegate, QString, QLabel, \
QShortcut, QKeySequence, SIGNAL, QMimeData, QSizePolicy,\ QShortcut, QKeySequence, SIGNAL, QMimeData, QToolButton
QToolButton
from calibre.ebooks.metadata import title_sort from calibre.ebooks.metadata import title_sort
from calibre.gui2 import config, NONE, gprefs from calibre.gui2 import config, NONE, gprefs
@ -1051,12 +1050,12 @@ class TagsModel(QAbstractItemModel): # {{{
if (key == 'authors' and len(ids) >= 5): if (key == 'authors' and len(ids) >= 5):
if not confirm('<p>'+_('Changing the authors for several books can ' if not confirm('<p>'+_('Changing the authors for several books can '
'take a while. Are you sure?') 'take a while. Are you sure?')
+'</p>', 'tag_browser_drop_authors', self.parent()): +'</p>', 'tag_browser_drop_authors', self.tags_view):
return return
elif len(ids) > 15: elif len(ids) > 15:
if not confirm('<p>'+_('Changing the metadata for that many books ' if not confirm('<p>'+_('Changing the metadata for that many books '
'can take a while. Are you sure?') 'can take a while. Are you sure?')
+'</p>', 'tag_browser_many_changes', self.parent()): +'</p>', 'tag_browser_many_changes', self.tags_view):
return return
fm = self.db.metadata_for_field(key) fm = self.db.metadata_for_field(key)

View File

@ -12,18 +12,17 @@ __docformat__ = 'restructuredtext en'
import collections, os, sys, textwrap, time, gc import collections, os, sys, textwrap, time, gc
from Queue import Queue, Empty from Queue import Queue, Empty
from threading import Thread from threading import Thread
from PyQt4.Qt import Qt, SIGNAL, QTimer, QHelpEvent, QAction, \ from PyQt4.Qt import (Qt, SIGNAL, QTimer, QHelpEvent, QAction,
QMenu, QIcon, pyqtSignal, \ QMenu, QIcon, pyqtSignal, QUrl,
QDialog, QSystemTrayIcon, QApplication, QKeySequence QDialog, QSystemTrayIcon, QApplication, QKeySequence)
from calibre import prints from calibre import prints
from calibre.constants import __appname__, isosx from calibre.constants import __appname__, isosx
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import prefs, dynamic from calibre.utils.config import prefs, dynamic
from calibre.utils.ipc.server import Server from calibre.utils.ipc.server import Server
from calibre.library.database2 import LibraryDatabase2 from calibre.library.database2 import LibraryDatabase2
from calibre.customize.ui import interface_actions from calibre.customize.ui import interface_actions
from calibre.gui2 import error_dialog, GetMetadata, open_local_file, \ from calibre.gui2 import error_dialog, GetMetadata, open_url, \
gprefs, max_available_height, config, info_dialog, Dispatcher, \ gprefs, max_available_height, config, info_dialog, Dispatcher, \
question_dialog question_dialog
from calibre.gui2.cover_flow import CoverFlowMixin from calibre.gui2.cover_flow import CoverFlowMixin
@ -567,37 +566,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
QApplication.instance().quit() QApplication.instance().quit()
def donate(self, *args): def donate(self, *args):
BUTTON = ''' open_url(QUrl('http://calibre-ebook.com/donate'))
<form action="https://www.paypal.com/cgi-bin/webscr" method="post">
<input type="hidden" name="cmd" value="_s-xclick" />
<input type="hidden" name="hosted_button_id" value="3029467" />
<input type="image" src="https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif" border="0" name="submit" alt="Donate to support calibre development" />
<img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
</form>
'''
MSG = _('is the result of the efforts of many volunteers from all '
'over the world. If you find it useful, please consider '
'donating to support its development. Your donation helps '
'keep calibre development going.')
HTML = u'''
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<title>Donate to support calibre</title>
</head>
<body style="background:white">
<div><a href="http://calibre-ebook.com"><img style="border:0px"
src="file://%s" alt="calibre" /></a></div>
<p>Calibre %s</p>
%s
</body>
</html>
'''%(P('content_server/calibre_banner.png').replace(os.sep, '/'), MSG, BUTTON)
pt = PersistentTemporaryFile('_donate.htm')
pt.write(HTML.encode('utf-8'))
pt.close()
open_local_file(pt.name)
def confirm_quit(self): def confirm_quit(self):
if self.job_manager.has_jobs(): if self.job_manager.has_jobs():

View File

@ -317,7 +317,7 @@ class CoverView(QGraphicsView, ImageDropMixin):
ImageDropMixin.__init__(self) ImageDropMixin.__init__(self)
def get_pixmap(self): def get_pixmap(self):
for item in self.scene().items(): for item in self.scene.items():
if hasattr(item, 'pixmap'): if hasattr(item, 'pixmap'):
return item.pixmap() return item.pixmap()
@ -342,6 +342,7 @@ class FontFamilyModel(QAbstractListModel):
self.families = list(qt_families.intersection(set(self.families))) self.families = list(qt_families.intersection(set(self.families)))
self.families.sort() self.families.sort()
self.families[:0] = [_('None')] self.families[:0] = [_('None')]
self.font = QFont('sansserif')
def rowCount(self, *args): def rowCount(self, *args):
return len(self.families) return len(self.families)
@ -354,10 +355,11 @@ class FontFamilyModel(QAbstractListModel):
return NONE return NONE
if role == Qt.DisplayRole: if role == Qt.DisplayRole:
return QVariant(family) return QVariant(family)
if False and role == Qt.FontRole: if role == Qt.FontRole:
# Causes a Qt crash with some fonts # If a user chooses some non standard font as the interface font,
# so disabled. # rendering some font names causes Qt to crash, so return what is
return QVariant(QFont(family)) # hopefully a "safe" font
return QVariant(self.font)
return NONE return NONE
def index_of(self, family): def index_of(self, family):

View File

@ -549,6 +549,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
- Download metadata and shortcuts - Download metadata and shortcuts
* - :kbd:`Ctrl+R` * - :kbd:`Ctrl+R`
- Restart calibre - Restart calibre
* - :kbd:`Ctrl+Shift+R`
- Restart calibre in debug mode
* - :kbd:`Shift+Ctrl+E` * - :kbd:`Shift+Ctrl+E`
- Add empty books to calibre - Add empty books to calibre
* - :kbd:`Ctrl+Q` * - :kbd:`Ctrl+Q`

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python2
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
@ -8,114 +8,71 @@ __docformat__ = 'restructuredtext en'
Plugin to make the commit command automatically close bugs when the commit Plugin to make the commit command automatically close bugs when the commit
message contains `Fix #number` or `Implement #number`. Also updates the commit message contains `Fix #number` or `Implement #number`. Also updates the commit
message with the summary of the closed bug. It also set the `--fixes` metadata message with the summary of the closed bug. It also set the `--fixes` metadata
appropriately. Currently only works with a Trac bug repository with the XMLRPC appropriately.
plugin enabled.
To use copy this file into `~/.bazaar/plugins` and add the following to branch.conf
in the working tree you want to use it with::
trac_reponame_url = <url>
trac_reponame_username = <username>
trac_reponame_password = <password>
''' '''
import os, re, xmlrpclib, subprocess import re, urllib, importlib, sys
from bzrlib.builtins import cmd_commit as _cmd_commit, tree_files from bzrlib.builtins import cmd_commit as _cmd_commit
from bzrlib import branch
import bzrlib import bzrlib
from lxml import html
SENDMAIL = ('/home/kovid/work/kde', 'pgp_mail')
class cmd_commit(_cmd_commit): class cmd_commit(_cmd_commit):
@classmethod def expand_bug(self, msg):
def trac_url(self, username, password, url):
return url.replace('//', '//%s:%s@'%(username, password))+'/login/xmlrpc'
def get_trac_summary(self, bug, url):
print 'Getting bug summary for bug #%s'%bug,
server = xmlrpclib.ServerProxy(url)
attributes = server.ticket.get(int(bug))[-1]
print attributes['summary']
return attributes['summary']
def expand_bug(self, msg, nick, config, bug_tracker, type='trac'):
prefix = '%s_%s_'%(type, nick)
username = config.get_user_option(prefix+'username')
password = config.get_user_option(prefix+'password')
close_bug = config.get_user_option(prefix+'pattern')
if close_bug is None:
close_bug = r'(Fix|Implement|Fixes|Fixed|Implemented)\s+#(\d+)' close_bug = r'(Fix|Implement|Fixes|Fixed|Implemented)\s+#(\d+)'
close_bug_pat = re.compile(close_bug, re.IGNORECASE) close_bug_pat = re.compile(close_bug, re.IGNORECASE)
match = close_bug_pat.search(msg) match = close_bug_pat.search(msg)
if not match: if not match:
return msg, None, None, None return msg, None, None
action, bug = match.group(1), match.group(2) action, bug = match.group(1), match.group(2)
summary = '' summary = ''
if type == 'trac': raw = urllib.urlopen('https://bugs.launchpad.net/calibre/+bug/' +
url = self.trac_url(username, password, bug_tracker) bug).read()
summary = self.get_trac_summary(bug, url) h1 = html.fromstring(raw).xpath('//h1[@id="edit-title"]')[0]
summary = html.tostring(h1, method='text', encoding=unicode).strip()
print 'Working on bug:', summary
if summary: if summary:
msg = msg.replace('#%s'%bug, '#%s (%s)'%(bug, summary)) msg = msg.replace('#%s'%bug, '#%s (%s)'%(bug, summary))
msg = msg.replace('Fixesed', 'Fixed') msg = msg.replace('Fixesed', 'Fixed')
return msg, bug, url, action return msg, bug, action
def get_bugtracker(self, basedir, type='trac'):
config = os.path.join(basedir, '.bzr', 'branch', 'branch.conf')
bugtracker, nick = None, None
if os.access(config, os.R_OK):
for line in open(config).readlines():
match = re.search(r'%s_(\S+)_url\s*=\s*(\S+)'%type, line)
if match:
nick, bugtracker = match.group(1), match.group(2)
break
return nick, bugtracker
def expand_message(self, msg, tree):
nick, bugtracker = self.get_bugtracker(tree.basedir, type='trac')
if not bugtracker:
return msg
config = branch.Branch.open(tree.basedir).get_config()
msg, bug, url, action = self.expand_bug(msg, nick, config, bugtracker)
return msg, bug, url, action, nick, config
def run(self, message=None, file=None, verbose=False, selected_list=None, def run(self, message=None, file=None, verbose=False, selected_list=None,
unchanged=False, strict=False, local=False, fixes=None, unchanged=False, strict=False, local=False, fixes=None,
author=None, show_diff=False, exclude=None): author=None, show_diff=False, exclude=None):
nick = config = bug = action = None bug = action = None
if message: if message:
try: message, bug, action = self.expand_bug(message)
message, bug, url, action, nick, config = \
self.expand_message(message, tree_files(selected_list)[0])
except ValueError:
pass
if nick and bug and not fixes: if bug and not fixes:
fixes = [nick+':'+bug] fixes = ['lp:'+bug]
ret = _cmd_commit.run(self, message=message, file=file, verbose=verbose, ret = _cmd_commit.run(self, message=message, file=file, verbose=verbose,
selected_list=selected_list, unchanged=unchanged, selected_list=selected_list, unchanged=unchanged,
strict=strict, local=local, fixes=fixes, strict=strict, local=local, fixes=fixes,
author=author, show_diff=show_diff, exclude=exclude) author=author, show_diff=show_diff, exclude=exclude)
if message and bug and action and nick and config: if message and bug and action:
self.close_bug(bug, action, url, config) self.close_bug(bug, action)
return ret return ret
def close_bug(self, bug, action, url, config): def close_bug(self, bug, action):
print 'Closing bug #%s'% bug print 'Closing bug #%s'% bug
#nick = config.get_nickname() #nick = config.get_nickname()
suffix = config.get_user_option('bug_close_comment') suffix = ('The fix will be in the next release. '
if suffix is None: 'calibre is usually released every Friday.')
suffix = 'The fix will be in the next release.'
action = action+'ed' action = action+'ed'
msg = '%s in branch %s. %s'%(action, 'lp:calibre', suffix) msg = '%s in branch %s. %s'%(action, 'lp:calibre', suffix)
msg = msg.replace('Fixesed', 'Fixed') msg = msg.replace('Fixesed', 'Fixed')
server = xmlrpclib.ServerProxy(url) msg += '\n\n status fixreleased'
server.ticket.update(int(bug), msg,
{'status':'closed', 'resolution':'fixed'}, sys.path.insert(0, SENDMAIL[0])
True)
subprocess.Popen('/home/kovid/work/kde/mail.py -f --delay 10'.split()) sendmail = importlib.import_module(SENDMAIL[1])
to = bug+'@bugs.launchpad.net'
sendmail.sendmail(msg, to, 'Fixed in lp:calibre')
bzrlib.commands.register_command(cmd_commit) bzrlib.commands.register_command(cmd_commit)

View File

@ -8,15 +8,18 @@ import re, htmlentitydefs
_ascii_pat = None _ascii_pat = None
def clean_ascii_chars(txt, charlist=None): def clean_ascii_chars(txt, charlist=None):
''' r'''
Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default Remove ASCII control chars.
This is all control chars except \\t,\\n and \\r This is all control chars except \t, \n and \r
''' '''
if not txt: if not txt:
return '' return ''
global _ascii_pat global _ascii_pat
if _ascii_pat is None: if _ascii_pat is None:
chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F)) chars = set(xrange(32))
chars.add(127)
for x in (9, 10, 13):
chars.remove(x)
_ascii_pat = re.compile(u'|'.join(map(unichr, chars))) _ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
if charlist is None: if charlist is None:

View File

@ -584,6 +584,12 @@ def educateQuotes(str):
# <p>He said, "'Quoted' words in a larger quote."</p> # <p>He said, "'Quoted' words in a larger quote."</p>
str = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", str) str = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", str)
str = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", str) str = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", str)
str = re.sub(r'''""(?=\w)''', """&#8220;&#8220;""", str)
str = re.sub(r"""''(?=\w)""", """&#8216;&#8216;""", str)
str = re.sub(r'''\"\'''', """&#8221;&#8217;""", str)
str = re.sub(r'''\'\"''', """&#8217;&#8221;""", str)
str = re.sub(r'''""''', """&#8221;&#8221;""", str)
str = re.sub(r"""''""", """&#8217;&#8217;""", str)
# Special case for decade abbreviations (the '80s): # Special case for decade abbreviations (the '80s):
str = re.sub(r"""\b'(?=\d{2}s)""", r"""&#8217;""", str) str = re.sub(r"""\b'(?=\d{2}s)""", r"""&#8217;""", str)

View File

@ -251,12 +251,12 @@ class WMF(object):
img.load(bmp) img.load(bmp)
return img.export('png') return img.export('png')
def wmf_unwrap(wmf_data): def wmf_unwrap(wmf_data, verbose=0):
''' '''
Return the largest embedded raster image in the WMF. Return the largest embedded raster image in the WMF.
The returned data is in PNG format. The returned data is in PNG format.
''' '''
w = WMF() w = WMF(verbose=verbose)
w(wmf_data) w(wmf_data)
if not w.has_raster_image: if not w.has_raster_image:
raise ValueError('No raster image found in the WMF') raise ValueError('No raster image found in the WMF')
@ -266,4 +266,5 @@ if __name__ == '__main__':
wmf = WMF(verbose=4) wmf = WMF(verbose=4)
wmf(open(sys.argv[-1], 'rb')) wmf(open(sys.argv[-1], 'rb'))
open('/t/test.bmp', 'wb').write(wmf.bitmaps[0]) open('/t/test.bmp', 'wb').write(wmf.bitmaps[0])
open('/t/test.png', 'wb').write(wmf.to_png())

View File

@ -28,6 +28,7 @@ class Article(object):
pass pass
if not isinstance(self._title, unicode): if not isinstance(self._title, unicode):
self._title = self._title.decode('utf-8', 'replace') self._title = self._title.decode('utf-8', 'replace')
self._title = clean_ascii_chars(self._title)
self.url = url self.url = url
self.author = author self.author = author
if author and not isinstance(author, unicode): if author and not isinstance(author, unicode):
@ -75,7 +76,7 @@ class Article(object):
t = t.decode('utf-8', 'replace') t = t.decode('utf-8', 'replace')
return t return t
def fset(self, val): def fset(self, val):
self._title = val self._title = clean_ascii_chars(val)
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)