mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
c5b2de9663
79
recipes/caijing.recipe
Normal file
79
recipes/caijing.recipe
Normal file
@ -0,0 +1,79 @@
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class Caijing(BasicNewsRecipe):
|
||||
|
||||
title = 'Caijing Magazine'
|
||||
__author__ = 'Eric Chen'
|
||||
|
||||
description = '''Bi-weekly Finance and Economics Review. Founded in 1998, the fortnightly CAIJING
|
||||
Magazine has firmly established itself as a news authority and leading voice for
|
||||
business and financial issues in China.
|
||||
CAIJING Magazine closely tracks the most important aspects of China's economic reforms,
|
||||
developments and policy changes, as well as major events in the capital markets. It also
|
||||
offers a broad international perspective through first-hand reporting on international
|
||||
political and economic issues.
|
||||
CAIJING Magazine is China's most widely read business and finance magazine, with a
|
||||
circulation of 225,000 per issue. It boasts top-level readers from government, business
|
||||
and academic circles. '''
|
||||
language = 'zh'
|
||||
category = 'news, China'
|
||||
encoding = 'UTF-8'
|
||||
timefmt = ' [%a, %d %b, %Y]'
|
||||
needs_subscription = True
|
||||
|
||||
remove_tags = [dict(attrs={'class':['topad', 'nav', 'searchbox', 'connav',
|
||||
'mbx', 'bianji', 'bianji bj', 'lnewlist', 'rdtj', 'loadComment',
|
||||
'conr', 'bottom', 'bottomcopyr', 'emaildy', 'rcom', 'allcontent']}),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
current_issue_url = ""
|
||||
current_issue_cover = ""
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://service.caijing.com.cn/usermanage/login')
|
||||
br.select_form(name='mainLoginForm')
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
soup0 = self.index_to_soup('http://magazine.caijing.com.cn/2011/cjindex2011/')
|
||||
div = soup0.find('div', attrs={'class':'fmcon'})
|
||||
link = div.find('a', href=True)
|
||||
current_issue_url = link['href']
|
||||
|
||||
soup = self.index_to_soup(current_issue_url)
|
||||
|
||||
for div_cover in soup.findAll('img', {'src' : re.compile('.')}):
|
||||
if re.search('\d{4}-\d{2}-\d{2}', div_cover['src']):
|
||||
self.current_issue_cover = div_cover['src']
|
||||
|
||||
feeds = []
|
||||
for section in soup.findAll('div', attrs={'class':'cebd'}):
|
||||
section_title = self.tag_to_string(section.find('div', attrs={'class':'ceti'}))
|
||||
articles = []
|
||||
for post in section.findAll('a', href=True):
|
||||
if re.search('\d{4}-\d{2}-\d{2}', post['href']):
|
||||
date = re.search('\d{4}-\d{2}-\d{2}', post['href']).group(0)
|
||||
id = re.search('\d{9}', post['href']).group(0)
|
||||
url = re.sub(r'\d.*', 'templates/inc/chargecontent2.jsp?id=', post['href'])
|
||||
url = url + id + '&time=' + date + '&cl=106&page=all'
|
||||
|
||||
title = self.tag_to_string(post)
|
||||
articles.append({'title':title, 'url':url, 'date':date})
|
||||
|
||||
if articles:
|
||||
feeds.append((section_title, articles))
|
||||
return feeds
|
||||
|
||||
def get_cover_url(self):
|
||||
return self.current_issue_cover
|
||||
|
@ -8,13 +8,13 @@ __description__ = 'Providing context and clarity on national and international n
|
||||
|
||||
'''csmonitor.com'''
|
||||
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class ChristianScienceMonitor(BasicNewsRecipe):
|
||||
|
||||
author = 'Kovid Goyal, Sujata Raman and Lorenzo Vigentini'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Providing context and clarity on national and international news, peoples and cultures'
|
||||
|
||||
cover_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
|
||||
@ -34,6 +34,49 @@ class ChristianScienceMonitor(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
nav = soup.find('div',attrs={'class':'navigation'})
|
||||
if nav:
|
||||
pager = nav.findAll('a')
|
||||
for part in pager:
|
||||
if 'Next' in part:
|
||||
nexturl = ('http://www.csmonitor.com' +
|
||||
re.findall(r'href="(.*?)"', str(part))[0])
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div',
|
||||
attrs={'class': re.compile('list-article-.*')})
|
||||
trash_c = soup2.findAll(attrs={'class': 'list-description'})
|
||||
trash_h = soup2.h1
|
||||
for tc in trash_c: tc.extract()
|
||||
trash_h.extract()
|
||||
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2, texttag, newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position, texttag)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
PRINT_RE = re.compile(r'/layout/set/print/content/view/print/[0-9]*')
|
||||
html = str(soup)
|
||||
try:
|
||||
print_found = PRINT_RE.findall(html)
|
||||
except Exception:
|
||||
pass
|
||||
if print_found:
|
||||
print_url = 'http://www.csmonitor.com' + print_found[0]
|
||||
print_soup = self.index_to_soup(print_url)
|
||||
else:
|
||||
self.append_page(soup, soup.body, 3)
|
||||
|
||||
trash_a = soup.findAll(attrs={'class': re.compile('navigation.*')})
|
||||
trash_b = soup.findAll(attrs={'style': re.compile('.*')})
|
||||
trash_d = soup.findAll(attrs={'class': 'sByline'})
|
||||
for ta in trash_a: ta.extract()
|
||||
for tb in trash_b: tb.extract()
|
||||
for td in trash_d: td.extract()
|
||||
|
||||
print_soup = soup
|
||||
return print_soup
|
||||
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
@ -43,7 +86,6 @@ class ChristianScienceMonitor(BasicNewsRecipe):
|
||||
(r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
|
||||
lambda match : '</body>'),
|
||||
]]
|
||||
|
||||
extra_css = '''
|
||||
h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
|
||||
.sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
|
||||
@ -56,10 +98,9 @@ class ChristianScienceMonitor(BasicNewsRecipe):
|
||||
#main{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
|
||||
#photo-details{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
|
||||
span.name{color:#205B87;font-family: Georgia,Times,"Times New Roman",serif; font-size: x-small}
|
||||
p#dateline{color:#444444 ; font-family:Arial,Helvetica,sans-serif ; font-style:italic;}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Top Stories' , u'http://rss.csmonitor.com/feeds/top'),
|
||||
p#dateline{color:#444444 ; font-family:Arial,Helvetica,sans-serif ; font-style:italic;} '''
|
||||
|
||||
feeds = [(u'Top Stories', u'http://rss.csmonitor.com/feeds/top'),
|
||||
(u'World' , u'http://rss.csmonitor.com/feeds/world'),
|
||||
(u'USA' , u'http://rss.csmonitor.com/feeds/usa'),
|
||||
(u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary'),
|
||||
@ -74,9 +115,7 @@ class ChristianScienceMonitor(BasicNewsRecipe):
|
||||
(u'Home Forum' , u'http://rss.csmonitor.com/feeds/homeforum')
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'mainColumn'}),
|
||||
]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'mainColumn'}), ]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
|
||||
@ -86,7 +125,10 @@ class ChristianScienceMonitor(BasicNewsRecipe):
|
||||
'hide', 'podBrdr']}),
|
||||
dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
|
||||
dict(name='form', attrs={'id':[ 'commentform']}) ,
|
||||
dict(name='div', attrs={'class': ['ui-comments']})
|
||||
]
|
||||
|
||||
remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']})]
|
||||
|
||||
remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']}),
|
||||
dict(name='div', attrs={'class': [re.compile('navigation.*')]}),
|
||||
dict(name='div', attrs={'style': [re.compile('.*')]})
|
||||
]
|
||||
|
@ -1,4 +1,3 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
@ -6,55 +5,48 @@ class WashingtonPost(BasicNewsRecipe):
|
||||
|
||||
title = 'Washington Post'
|
||||
description = 'US political news'
|
||||
__author__ = 'Kovid Goyal and Sujata Raman'
|
||||
__author__ = 'Kovid Goyal'
|
||||
use_embedded_content = False
|
||||
max_articles_per_feed = 20
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
|
||||
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
extra_css = '''
|
||||
#articleCopyright { font-family:Arial,helvetica,sans-serif ; font-weight:bold ; font-size:x-small ;}
|
||||
p { font-family:"Times New Roman",times,serif ; font-weight:normal ; font-size:small ;}
|
||||
body{font-family:arial,helvetica,sans-serif}
|
||||
'''
|
||||
|
||||
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
|
||||
('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'),
|
||||
('Nation', 'http://www.washingtonpost.com/wp-dyn/rss/nation/index.xml'),
|
||||
('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'),
|
||||
('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'),
|
||||
('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'),
|
||||
('Health', 'http://www.washingtonpost.com/wp-dyn/rss/health/index.xml'),
|
||||
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
|
||||
('Style',
|
||||
'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
|
||||
('NFL Sports',
|
||||
'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
|
||||
('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
|
||||
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
|
||||
feeds = [
|
||||
('Politics', 'http://www.washingtonpost.com/rss/politics'),
|
||||
('Nation', 'http://www.washingtonpost.com/rss/national'),
|
||||
('World', 'http://www.washingtonpost.com/rss/world'),
|
||||
('Business', 'http://www.washingtonpost.com/rss/business'),
|
||||
('Lifestyle', 'http://www.washingtonpost.com/rss/lifestyle'),
|
||||
('Sports', 'http://www.washingtonpost.com/rss/sports'),
|
||||
('Redskins', 'http://www.washingtonpost.com/rss/sports/redskins'),
|
||||
('Opinions', 'http://www.washingtonpost.com/rss/opinions'),
|
||||
('Entertainment', 'http://www.washingtonpost.com/rss/entertainment'),
|
||||
('Local', 'http://www.washingtonpost.com/rss/local'),
|
||||
('Investigations',
|
||||
'http://www.washingtonpost.com/rss/investigations'),
|
||||
]
|
||||
|
||||
remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
|
||||
remove_tags = [
|
||||
{'class':lambda x: x and 'article-toolbar' in x},
|
||||
{'class':lambda x: x and 'quick-comments' in x},
|
||||
{'class':lambda x: x and 'tweet' in x},
|
||||
{'class':lambda x: x and 'article-related' in x},
|
||||
{'class':lambda x: x and 'hidden' in x.split()},
|
||||
{'class':lambda x: x and 'also-read' in x.split()},
|
||||
{'class':lambda x: x and 'partners-content' in x.split()},
|
||||
{'class':['module share', 'module ads', 'comment-vars', 'hidden',
|
||||
'share-icons-wrap', 'comments']},
|
||||
{'id':['right-rail']},
|
||||
|
||||
]
|
||||
keep_only_tags = dict(id=['content', 'article'])
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', article.get('link', None))
|
||||
|
||||
def print_version(self, url):
|
||||
return url.rpartition('.')[0] + '_pf.html'
|
||||
url = url.rpartition('?')[0]
|
||||
return url.replace('_story.html', '_singlePage.html')
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for div in soup.findAll(name='div', style=re.compile('margin')):
|
||||
div['style'] = ''
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for tag in soup.findAll('font'):
|
||||
if tag.has_key('size'):
|
||||
if tag['size'] == '+2':
|
||||
if tag.b:
|
||||
return soup
|
||||
return None
|
||||
|
@ -18,6 +18,6 @@ def recipe_title_callback(raw):
|
||||
return eval(raw.decode('utf-8'))
|
||||
|
||||
vipy.session.add_content_browser('.r', ',r', 'Recipe',
|
||||
vipy.session.glob_based_iterator(os.path.join(project_dir, 'resources', 'recipes', '*.recipe')),
|
||||
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
|
||||
vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
|
||||
EOFPY
|
||||
|
@ -14,9 +14,9 @@ from setup.build_environment import HOST, PROJECT
|
||||
BASE_RSYNC = ['rsync', '-avz', '--delete']
|
||||
EXCLUDES = []
|
||||
for x in [
|
||||
'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac',
|
||||
'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac', 'recipes',
|
||||
'.bzr', '.build', '.svn', 'build', 'dist', 'imgsrc', '*.pyc', '*.pyo', '*.swp',
|
||||
'*.swo']:
|
||||
'*.swo', 'format_docs']:
|
||||
EXCLUDES.extend(['--exclude', x])
|
||||
SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]
|
||||
|
||||
@ -138,7 +138,7 @@ class VMInstaller(Command):
|
||||
self.vm = self.VM
|
||||
if not self.vmware_started():
|
||||
self.start_vmware()
|
||||
subprocess.call(['chmod', '-R', '+r', 'resources/recipes'])
|
||||
subprocess.call(['chmod', '-R', '+r', 'recipes'])
|
||||
self.start_vm()
|
||||
self.download_installer()
|
||||
if not self.dont_shutdown:
|
||||
|
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os, shutil, glob, py_compile, subprocess, re
|
||||
import sys, os, shutil, glob, py_compile, subprocess, re, zipfile, time
|
||||
|
||||
from setup import Command, modules, functions, basenames, __version__, \
|
||||
__appname__
|
||||
@ -40,6 +40,13 @@ DESCRIPTIONS = {
|
||||
'calibre-smtp' : 'Command line interface for sending books via email',
|
||||
}
|
||||
|
||||
def walk(dir):
|
||||
''' A nice interface to os.walk '''
|
||||
for record in os.walk(dir):
|
||||
for f in record[-1]:
|
||||
yield os.path.join(record[0], f)
|
||||
|
||||
|
||||
class Win32Freeze(Command, WixMixIn):
|
||||
|
||||
description = 'Free windows calibre installation'
|
||||
@ -63,12 +70,15 @@ class Win32Freeze(Command, WixMixIn):
|
||||
self.rc_template = self.j(self.d(self.a(__file__)), 'template.rc')
|
||||
self.py_ver = ''.join(map(str, sys.version_info[:2]))
|
||||
self.lib_dir = self.j(self.base, 'Lib')
|
||||
self.pydlib = self.j(self.base, 'pydlib')
|
||||
self.pylib = self.j(self.base, 'pylib.zip')
|
||||
|
||||
self.initbase()
|
||||
self.build_launchers()
|
||||
self.freeze()
|
||||
self.embed_manifests()
|
||||
self.install_site_py()
|
||||
self.archive_lib_dir()
|
||||
self.create_installer()
|
||||
|
||||
def initbase(self):
|
||||
@ -356,4 +366,108 @@ class Win32Freeze(Command, WixMixIn):
|
||||
dest, lib]
|
||||
self.run_builder(cmd)
|
||||
|
||||
def archive_lib_dir(self):
|
||||
self.info('Putting all python code into a zip file for performance')
|
||||
if os.path.exists(self.pydlib):
|
||||
shutil.rmtree(self.pydlib)
|
||||
os.makedirs(self.pydlib)
|
||||
self.zf_timestamp = time.localtime(time.time())[:6]
|
||||
self.zf_names = set()
|
||||
with zipfile.ZipFile(self.pylib, 'w', zipfile.ZIP_STORED) as zf:
|
||||
for x in os.listdir(self.lib_dir):
|
||||
if x == 'site-packages':
|
||||
continue
|
||||
self.add_to_zipfile(zf, x, self.lib_dir)
|
||||
|
||||
sp = self.j(self.lib_dir, 'site-packages')
|
||||
handled = set(['site.pyo'])
|
||||
for pth in ('PIL.pth', 'pywin32.pth'):
|
||||
handled.add(pth)
|
||||
shutil.copyfile(self.j(sp, pth), self.j(self.pydlib, pth))
|
||||
for d in self.get_pth_dirs(self.j(sp, pth)):
|
||||
shutil.copytree(d, self.j(self.pydlib, self.b(d)), True)
|
||||
handled.add(self.b(d))
|
||||
|
||||
handled.add('easy-install.pth')
|
||||
for d in self.get_pth_dirs(self.j(sp, 'easy-install.pth')):
|
||||
handled.add(self.b(d))
|
||||
zip_safe = self.is_zip_safe(d)
|
||||
for x in os.listdir(d):
|
||||
if x == 'EGG-INFO':
|
||||
continue
|
||||
if zip_safe:
|
||||
self.add_to_zipfile(zf, x, d)
|
||||
else:
|
||||
absp = self.j(d, x)
|
||||
dest = self.j(self.pydlib, x)
|
||||
if os.path.isdir(absp):
|
||||
shutil.copytree(absp, dest, True)
|
||||
else:
|
||||
shutil.copy2(absp, dest)
|
||||
|
||||
for x in os.listdir(sp):
|
||||
if x in handled or x.endswith('.egg-info'):
|
||||
continue
|
||||
absp = self.j(sp, x)
|
||||
if os.path.isdir(absp):
|
||||
if not os.listdir(absp):
|
||||
continue
|
||||
if self.is_zip_safe(absp):
|
||||
self.add_to_zipfile(zf, x, sp)
|
||||
else:
|
||||
shutil.copytree(absp, self.j(self.pydlib, x), True)
|
||||
else:
|
||||
if x.endswith('.pyd'):
|
||||
shutil.copy2(absp, self.j(self.pydlib, x))
|
||||
else:
|
||||
self.add_to_zipfile(zf, x, sp)
|
||||
|
||||
shutil.rmtree(self.lib_dir)
|
||||
|
||||
def is_zip_safe(self, path):
|
||||
for f in walk(path):
|
||||
ext = os.path.splitext(f)[1].lower()
|
||||
if ext in ('.pyd', '.dll', '.exe'):
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_pth_dirs(self, pth):
|
||||
base = os.path.dirname(pth)
|
||||
for line in open(pth).readlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#') or line.startswith('import'):
|
||||
continue
|
||||
if line == 'win32\\lib':
|
||||
continue
|
||||
candidate = self.j(base, line)
|
||||
if os.path.exists(candidate):
|
||||
yield candidate
|
||||
|
||||
def add_to_zipfile(self, zf, name, base, exclude=frozenset()):
|
||||
abspath = self.j(base, name)
|
||||
name = name.replace(os.sep, '/')
|
||||
if name in self.zf_names:
|
||||
raise ValueError('Already added %r to zipfile [%r]'%(name, abspath))
|
||||
zinfo = zipfile.ZipInfo(filename=name, date_time=self.zf_timestamp)
|
||||
|
||||
if os.path.isdir(abspath):
|
||||
if not os.listdir(abspath):
|
||||
return
|
||||
zinfo.external_attr = 0700 << 16
|
||||
zf.writestr(zinfo, '')
|
||||
for x in os.listdir(abspath):
|
||||
if x not in exclude:
|
||||
self.add_to_zipfile(zf, name + os.sep + x, base)
|
||||
else:
|
||||
ext = os.path.splitext(name)[1].lower()
|
||||
if ext in ('.pyd', '.dll', '.exe'):
|
||||
raise ValueError('Cannot add %r to zipfile'%abspath)
|
||||
zinfo.external_attr = 0600 << 16
|
||||
if ext in ('.py', '.pyc', '.pyo'):
|
||||
with open(abspath, 'rb') as f:
|
||||
zf.writestr(zinfo, f.read())
|
||||
|
||||
self.zf_names.add(name)
|
||||
|
||||
|
||||
|
||||
|
@ -96,7 +96,7 @@ def main():
|
||||
|
||||
abs__file__()
|
||||
|
||||
addsitedir(os.path.join(sys.app_dir, 'Lib', 'site-packages'))
|
||||
addsitedir(os.path.join(sys.app_dir, 'pydlib'))
|
||||
|
||||
add_calibre_vars()
|
||||
|
||||
|
@ -198,7 +198,7 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
|
||||
buf[strlen(buf)-1] = '\0';
|
||||
|
||||
_snprintf_s(python_home, MAX_PATH, _TRUNCATE, "%s", buf);
|
||||
_snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\DLLs;%s\\Lib;%s\\Lib\\site-packages",
|
||||
_snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\pylib.zip;%s\\pydlib;%s\\DLLs",
|
||||
buf, buf, buf);
|
||||
free(buf);
|
||||
|
||||
|
@ -154,9 +154,9 @@
|
||||
<CustomAction Id="LaunchApplication" BinaryKey="WixCA"
|
||||
DllEntry="WixShellExec" Impersonate="yes"/>
|
||||
|
||||
<InstallUISequence>
|
||||
<!--<InstallUISequence>
|
||||
<FileCost Suppress="yes" />
|
||||
</InstallUISequence>
|
||||
</InstallUISequence>-->
|
||||
|
||||
</Product>
|
||||
</Wix>
|
||||
|
@ -58,12 +58,14 @@ class ANDROID(USBMS):
|
||||
0x413c : { 0xb007 : [0x0100, 0x0224]},
|
||||
|
||||
# LG
|
||||
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100] },
|
||||
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
|
||||
|
||||
# Archos
|
||||
0x0e79 : {
|
||||
0x1400 : [0x0222, 0x0216],
|
||||
0x1408 : [0x0222, 0x0216],
|
||||
0x1411 : [0x216],
|
||||
0x1417 : [0x0216],
|
||||
0x1419 : [0x0216],
|
||||
0x1420 : [0x0216],
|
||||
0x1422 : [0x0216]
|
||||
@ -91,14 +93,14 @@ class ANDROID(USBMS):
|
||||
|
||||
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
|
||||
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
|
||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC']
|
||||
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE']
|
||||
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
|
||||
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
|
||||
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
|
||||
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
|
||||
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
|
||||
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
|
||||
'7', 'A956', 'A955']
|
||||
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7']
|
||||
|
@ -19,7 +19,7 @@ class BLACKBERRY(USBMS):
|
||||
|
||||
VENDOR_ID = [0x0fca]
|
||||
PRODUCT_ID = [0x8004, 0x0004]
|
||||
BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211]
|
||||
BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220]
|
||||
|
||||
VENDOR_NAME = 'RIM'
|
||||
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
|
||||
|
@ -49,6 +49,8 @@ HEURISTIC_OPTIONS = ['markup_chapter_headings',
|
||||
'dehyphenate', 'renumber_headings',
|
||||
'replace_scene_breaks']
|
||||
|
||||
DEFAULT_TRUE_OPTIONS = HEURISTIC_OPTIONS + ['remove_fake_margins']
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
log(help)
|
||||
@ -90,7 +92,7 @@ def option_recommendation_to_cli_option(add_option, rec):
|
||||
if opt.long_switch == 'verbose':
|
||||
attrs['action'] = 'count'
|
||||
attrs.pop('type', '')
|
||||
if opt.name in HEURISTIC_OPTIONS and rec.recommended_value is True:
|
||||
if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True:
|
||||
switches = ['--disable-'+opt.long_switch]
|
||||
add_option(Option(*switches, **attrs))
|
||||
|
||||
@ -162,6 +164,7 @@ def add_pipeline_options(parser, plumber):
|
||||
'chapter', 'chapter_mark',
|
||||
'prefer_metadata_cover', 'remove_first_image',
|
||||
'insert_metadata', 'page_breaks_before',
|
||||
'remove_fake_margins',
|
||||
]
|
||||
),
|
||||
|
||||
|
@ -304,6 +304,17 @@ OptionRecommendation(name='page_breaks_before',
|
||||
'before the specified elements.')
|
||||
),
|
||||
|
||||
OptionRecommendation(name='remove_fake_margins',
|
||||
recommended_value=True, level=OptionRecommendation.LOW,
|
||||
help=_('Some documents specify page margins by '
|
||||
'specifying a left and right margin on each individual '
|
||||
'paragraph. calibre will try to detect and remove these '
|
||||
'margins. Sometimes, this can cause the removal of '
|
||||
'margins that should not have been removed. In this '
|
||||
'case you can disable the removal.')
|
||||
),
|
||||
|
||||
|
||||
OptionRecommendation(name='margin_top',
|
||||
recommended_value=5.0, level=OptionRecommendation.LOW,
|
||||
help=_('Set the top margin in pts. Default is %default. '
|
||||
@ -988,9 +999,13 @@ OptionRecommendation(name='sr3_replace',
|
||||
page_break_on_body=self.output_plugin.file_type in ('mobi',
|
||||
'lit'))
|
||||
flattener(self.oeb, self.opts)
|
||||
|
||||
self.opts.insert_blank_line = oibl
|
||||
self.opts.remove_paragraph_spacing = orps
|
||||
|
||||
from calibre.ebooks.oeb.transforms.page_margin import RemoveFakeMargins
|
||||
RemoveFakeMargins()(self.oeb, self.log, self.opts)
|
||||
|
||||
pr(0.9)
|
||||
self.flush()
|
||||
|
||||
|
@ -28,11 +28,12 @@ class Worker(Thread): # {{{
|
||||
Get book details from amazons book page in a separate thread
|
||||
'''
|
||||
|
||||
def __init__(self, url, result_queue, browser, log, timeout=20):
|
||||
def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
self.url, self.result_queue = url, result_queue
|
||||
self.log, self.timeout = log, timeout
|
||||
self.relevance, self.plugin = relevance, plugin
|
||||
self.browser = browser.clone_browser()
|
||||
self.cover_url = self.amazon_id = self.isbn = None
|
||||
|
||||
@ -161,6 +162,15 @@ class Worker(Thread): # {{{
|
||||
else:
|
||||
self.log.warning('Failed to find product description for url: %r'%self.url)
|
||||
|
||||
mi.source_relevance = self.relevance
|
||||
|
||||
if self.amazon_id:
|
||||
if self.isbn:
|
||||
self.plugin.cache_isbn_to_identifier(self.isbn, self.amazon_id)
|
||||
if self.cover_url:
|
||||
self.cache_identifier_to_cover_url(self.amazon_id,
|
||||
self.cover_url)
|
||||
|
||||
self.result_queue.put(mi)
|
||||
|
||||
def parse_asin(self, root):
|
||||
@ -321,6 +331,20 @@ class Amazon(Source):
|
||||
|
||||
# }}}
|
||||
|
||||
def get_cached_cover_url(self, identifiers):
|
||||
url = None
|
||||
asin = identifiers.get('amazon', None)
|
||||
if asin is None:
|
||||
asin = identifiers.get('asin', None)
|
||||
if asin is None:
|
||||
isbn = identifiers.get('isbn', None)
|
||||
if isbn is not None:
|
||||
asin = self.cached_isbn_to_identifier(isbn)
|
||||
if asin is not None:
|
||||
url = self.cached_identifier_to_cover_url(asin)
|
||||
|
||||
return url
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=30):
|
||||
'''
|
||||
@ -396,7 +420,8 @@ class Amazon(Source):
|
||||
log.error('No matches found with query: %r'%query)
|
||||
return
|
||||
|
||||
workers = [Worker(url, result_queue, br, log) for url in matches]
|
||||
workers = [Worker(url, result_queue, br, log, i, self) for i, url in
|
||||
enumerate(matches)]
|
||||
|
||||
for w in workers:
|
||||
w.start()
|
||||
@ -414,14 +439,6 @@ class Amazon(Source):
|
||||
if not a_worker_is_alive:
|
||||
break
|
||||
|
||||
for w in workers:
|
||||
if w.amazon_id:
|
||||
if w.isbn:
|
||||
self.cache_isbn_to_identifier(w.isbn, w.amazon_id)
|
||||
if w.cover_url:
|
||||
self.cache_identifier_to_cover_url(w.amazon_id,
|
||||
w.cover_url)
|
||||
|
||||
return None
|
||||
# }}}
|
||||
|
||||
|
@ -21,6 +21,21 @@ def create_log(ostream=None):
|
||||
log.outputs = [FileStream(ostream)]
|
||||
return log
|
||||
|
||||
words = ("the", "a", "an", "of", "and")
|
||||
prefix_pat = re.compile(r'^(%s)\s+'%("|".join(words)))
|
||||
trailing_paren_pat = re.compile(r'\(.*\)$')
|
||||
whitespace_pat = re.compile(r'\s+')
|
||||
|
||||
def cleanup_title(s):
|
||||
if not s:
|
||||
s = _('Unknown')
|
||||
s = s.strip().lower()
|
||||
s = prefix_pat.sub(' ', s)
|
||||
s = trailing_paren_pat.sub('', s)
|
||||
s = whitespace_pat.sub(' ', s)
|
||||
return s.strip()
|
||||
|
||||
|
||||
class Source(Plugin):
|
||||
|
||||
type = _('Metadata source')
|
||||
@ -128,10 +143,91 @@ class Source(Plugin):
|
||||
gr.append(job)
|
||||
return [g for g in groups if g]
|
||||
|
||||
def test_fields(self, mi):
|
||||
'''
|
||||
Return the first field from self.touched_fields that is null on the
|
||||
mi object
|
||||
'''
|
||||
for key in self.touched_fields:
|
||||
if key.startswith('identifier:'):
|
||||
key = key.partition(':')[-1]
|
||||
if not mi.has_identifier(key):
|
||||
return 'identifier: ' + key
|
||||
elif mi.is_null(key):
|
||||
return key
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
# Metadata API {{{
|
||||
|
||||
def get_cached_cover_url(self, identifiers):
|
||||
'''
|
||||
Return cached cover URL for the book identified by
|
||||
the identifiers dict or Noneif no such URL exists
|
||||
'''
|
||||
return None
|
||||
|
||||
def compare_identify_results(self, x, y, title=None, authors=None,
|
||||
identifiers={}):
|
||||
'''
|
||||
Method used to sort the results from a call to identify by relevance.
|
||||
Uses the actual query and various heuristics to rank results.
|
||||
Re-implement in your plugin if this generic algorithm is not suitable.
|
||||
Note that this method assumes x and y have a source_relevance
|
||||
attribute.
|
||||
|
||||
one < two iff one is more relevant than two
|
||||
'''
|
||||
# First, guarantee that if the query specifies an ISBN, the result with
|
||||
# the same isbn is the most relevant
|
||||
def isbn_test(mi):
|
||||
return mi.isbn and mi.isbn == identifiers.get('isbn', None)
|
||||
|
||||
def boolcmp(a, b):
|
||||
return -1 if a and not b else 1 if not a and b else 0
|
||||
|
||||
x_has_isbn, y_has_isbn = isbn_test(x), isbn_test(y)
|
||||
result = boolcmp(x_has_isbn, y_has_isbn)
|
||||
if result != 0:
|
||||
return result
|
||||
|
||||
# Now prefer results that have complete metadata over those that don't
|
||||
x_has_all_fields = self.test_fields(x) is None
|
||||
y_has_all_fields = self.test_fields(y) is None
|
||||
|
||||
result = boolcmp(x_has_all_fields, y_has_all_fields)
|
||||
if result != 0:
|
||||
return result
|
||||
|
||||
# Now prefer results whose title matches the search query
|
||||
if title:
|
||||
x_title = cleanup_title(x.title)
|
||||
y_title = cleanup_title(y.title)
|
||||
t = cleanup_title(title)
|
||||
x_has_title, y_has_title = x_title == t, y_title == t
|
||||
result = boolcmp(x_has_title, y_has_title)
|
||||
if result != 0:
|
||||
return result
|
||||
|
||||
# Now prefer results with the longer comments, within 10%
|
||||
cx = len(x.comments.strip() if x.comments else '')
|
||||
cy = len(y.comments.strip() if y.comments else '')
|
||||
t = (cx + cy) / 20
|
||||
result = cy - cx
|
||||
if result != 0 and abs(cx - cy) > t:
|
||||
return result
|
||||
|
||||
# Now prefer results with cached cover URLs
|
||||
x_has_cover = self.get_cached_cover_url(x.identifiers) is not None
|
||||
y_has_cover = self.get_cached_cover_url(y.identifiers) is not None
|
||||
result = boolcmp(x_has_cover, y_has_cover)
|
||||
if result != 0:
|
||||
return result
|
||||
|
||||
# Now use the relevance reported by the remote search engine
|
||||
return x.source_relevance - y.source_relevance
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None,
|
||||
identifiers={}, timeout=5):
|
||||
'''
|
||||
@ -147,6 +243,15 @@ class Source(Plugin):
|
||||
the same ISBN/special identifier does not need to get the cover URL
|
||||
again. Use the caching API for this.
|
||||
|
||||
Every Metadata object put into result_queue by this method must have a
|
||||
`source_relevance` attribute that is an integer indicating the order in
|
||||
which the results were returned by the metadata source for this query.
|
||||
This integer will be used by :meth:`compare_identify_results`. If the
|
||||
order is unimportant, set it to zero for every result.
|
||||
|
||||
Make sure that any cover/isbn mapping information is cached before the
|
||||
Metadata object is put into result_queue.
|
||||
|
||||
:param log: A log object, use it to output debugging information/errors
|
||||
:param result_queue: A result Queue, results should be put into it.
|
||||
Each result is a Metadata object
|
||||
|
@ -190,14 +190,15 @@ class GoogleBooks(Source):
|
||||
return raw and len(raw) > 17000 and raw[1:4] != 'PNG'
|
||||
|
||||
def get_all_details(self, br, log, entries, abort, result_queue, timeout):
|
||||
for i in entries:
|
||||
for relevance, i in enumerate(entries):
|
||||
try:
|
||||
ans = to_metadata(br, log, i, timeout)
|
||||
if isinstance(ans, Metadata):
|
||||
result_queue.put(ans)
|
||||
ans.source_relevance = relevance
|
||||
for isbn in getattr(ans, 'all_isbns', []):
|
||||
self.cache_isbn_to_identifier(isbn,
|
||||
ans.identifiers['google'])
|
||||
result_queue.put(ans)
|
||||
except:
|
||||
log.exception(
|
||||
'Failed to get metadata for identify entry:',
|
||||
|
@ -46,15 +46,6 @@ def authors_test(authors):
|
||||
|
||||
return test
|
||||
|
||||
def _test_fields(touched_fields, mi):
|
||||
for key in touched_fields:
|
||||
if key.startswith('identifier:'):
|
||||
key = key.partition(':')[-1]
|
||||
if not mi.has_identifier(key):
|
||||
return 'identifier: ' + key
|
||||
elif mi.is_null(key):
|
||||
return key
|
||||
|
||||
|
||||
def test_identify_plugin(name, tests):
|
||||
'''
|
||||
@ -120,11 +111,10 @@ def test_identify_plugin(name, tests):
|
||||
prints('Log saved to', lf)
|
||||
raise SystemExit(1)
|
||||
|
||||
good = [x for x in possibles if _test_fields(plugin.touched_fields, x) is
|
||||
good = [x for x in possibles if plugin.test_fields(x) is
|
||||
None]
|
||||
if not good:
|
||||
prints('Failed to find', _test_fields(plugin.touched_fields,
|
||||
possibles[0]))
|
||||
prints('Failed to find', plugin.test_fields(possibles[0]))
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
|
153
src/calibre/ebooks/oeb/transforms/page_margin.py
Normal file
153
src/calibre/ebooks/oeb/transforms/page_margin.py
Normal file
@ -0,0 +1,153 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from collections import Counter
|
||||
|
||||
from calibre.ebooks.oeb.base import OEB_STYLES, barename, XPath
|
||||
|
||||
class RemoveFakeMargins(object):
|
||||
|
||||
'''
|
||||
Remove left and right margins from paragraph/divs if the same margin is specified
|
||||
on almost all the elements of at that level.
|
||||
|
||||
Must be called only after CSS flattening
|
||||
'''
|
||||
|
||||
def __call__(self, oeb, log, opts):
|
||||
if not opts.remove_fake_margins:
|
||||
return
|
||||
self.oeb, self.log, self.opts = oeb, log, opts
|
||||
stylesheet = None
|
||||
self.levels = {}
|
||||
self.stats = {}
|
||||
self.selector_map = {}
|
||||
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type.lower() in OEB_STYLES:
|
||||
stylesheet = item
|
||||
break
|
||||
if stylesheet is None:
|
||||
return
|
||||
|
||||
self.log('Removing fake margins...')
|
||||
|
||||
stylesheet = stylesheet.data
|
||||
|
||||
from cssutils.css import CSSRule
|
||||
for rule in stylesheet.cssRules.rulesOfType(CSSRule.STYLE_RULE):
|
||||
self.selector_map[rule.selectorList.selectorText] = rule.style
|
||||
|
||||
self.find_levels()
|
||||
|
||||
for level in self.levels:
|
||||
self.process_level(level)
|
||||
|
||||
def get_margins(self, elem):
|
||||
cls = elem.get('class', None)
|
||||
if cls:
|
||||
style = self.selector_map.get('.'+cls, None)
|
||||
if style:
|
||||
return style.marginLeft, style.marginRight, style
|
||||
return '', '', None
|
||||
|
||||
|
||||
def process_level(self, level):
|
||||
elems = self.levels[level]
|
||||
self.stats[level+'_left'] = Counter()
|
||||
self.stats[level+'_right'] = Counter()
|
||||
|
||||
for elem in elems:
|
||||
lm, rm = self.get_margins(elem)[:2]
|
||||
self.stats[level+'_left'][lm] += 1
|
||||
self.stats[level+'_right'][rm] += 1
|
||||
|
||||
self.log.debug(level, ' left margin stats:', self.stats[level+'_left'])
|
||||
self.log.debug(level, ' right margin stats:', self.stats[level+'_right'])
|
||||
|
||||
remove_left = self.analyze_stats(self.stats[level+'_left'])
|
||||
remove_right = self.analyze_stats(self.stats[level+'_right'])
|
||||
|
||||
|
||||
if remove_left:
|
||||
mcl = self.stats[level+'_left'].most_common(1)[0][0]
|
||||
self.log('Removing level %s left margin of:'%level, mcl)
|
||||
|
||||
if remove_right:
|
||||
mcr = self.stats[level+'_right'].most_common(1)[0][0]
|
||||
self.log('Removing level %s right margin of:'%level, mcr)
|
||||
|
||||
if remove_left or remove_right:
|
||||
for elem in elems:
|
||||
lm, rm, style = self.get_margins(elem)
|
||||
if remove_left and lm == mcl:
|
||||
style.removeProperty('margin-left')
|
||||
if remove_right and rm == mcr:
|
||||
style.removeProperty('margin-right')
|
||||
|
||||
def find_levels(self):
|
||||
|
||||
def level_of(elem, body):
|
||||
ans = 1
|
||||
while elem.getparent() is not body:
|
||||
ans += 1
|
||||
elem = elem.getparent()
|
||||
return ans
|
||||
|
||||
paras = XPath('descendant::h:p|descendant::h:div')
|
||||
|
||||
for item in self.oeb.spine:
|
||||
body = XPath('//h:body')(item.data)
|
||||
if not body:
|
||||
continue
|
||||
body = body[0]
|
||||
|
||||
for p in paras(body):
|
||||
level = level_of(p, body)
|
||||
level = '%s_%d'%(barename(p.tag), level)
|
||||
if level not in self.levels:
|
||||
self.levels[level] = []
|
||||
self.levels[level].append(p)
|
||||
|
||||
remove = set()
|
||||
for k, v in self.levels.iteritems():
|
||||
num = len(v)
|
||||
self.log.debug('Found %d items of level:'%num, k)
|
||||
level = int(k.split('_')[-1])
|
||||
tag = k.split('_')[0]
|
||||
if tag == 'p' and num < 25:
|
||||
remove.add(k)
|
||||
if tag == 'div':
|
||||
if level > 2 and num < 25:
|
||||
remove.add(k)
|
||||
elif level < 3:
|
||||
# Check each level < 3 element and only keep those
|
||||
# that have many child paras
|
||||
for elem in list(v):
|
||||
children = len(paras(elem))
|
||||
if children < 5:
|
||||
v.remove(elem)
|
||||
|
||||
for k in remove:
|
||||
self.levels.pop(k)
|
||||
self.log.debug('Ignoring level', k)
|
||||
|
||||
def analyze_stats(self, stats):
|
||||
if not stats:
|
||||
return False
|
||||
mc = stats.most_common(1)
|
||||
if len(mc) > 1:
|
||||
return False
|
||||
mc = mc[0]
|
||||
most_common, most_common_count = mc
|
||||
if not most_common or most_common == '0':
|
||||
return False
|
||||
total = sum(stats.values())
|
||||
# True if greater than 95% of elements have the same margin
|
||||
return most_common_count/total > 0.95
|
@ -22,6 +22,7 @@ border_style_map = {
|
||||
'dot-dot-dash': 'dotted',
|
||||
'outset': 'outset',
|
||||
'tripple': 'double',
|
||||
'triple': 'double',
|
||||
'thick-thin-small': 'solid',
|
||||
'thin-thick-small': 'solid',
|
||||
'thin-thick-thin-small': 'solid',
|
||||
|
@ -121,97 +121,108 @@ class Textile(object):
|
||||
btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
|
||||
btag_lite = ('bq', 'bc', 'p')
|
||||
|
||||
glyph_defaults = (
|
||||
('mac_cent', '¢'),
|
||||
('mac_pound', '£'),
|
||||
('mac_yen', '¥'),
|
||||
('mac_quarter', '¼'),
|
||||
('mac_half', '½'),
|
||||
('mac_three-quarter', '¾'),
|
||||
('mac_cA-grave', 'À'),
|
||||
('mac_cA-acute', 'Á'),
|
||||
('mac_cA-circumflex', 'Â'),
|
||||
('mac_cA-tilde', 'Ã'),
|
||||
('mac_cA-diaeresis', 'Ä'),
|
||||
('mac_cA-ring', 'Å'),
|
||||
('mac_cAE', 'Æ'),
|
||||
('mac_cC-cedilla', 'Ç'),
|
||||
('mac_cE-grave', 'È'),
|
||||
('mac_cE-acute', 'É'),
|
||||
('mac_cE-circumflex', 'Ê'),
|
||||
('mac_cE-diaeresis', 'Ë'),
|
||||
('mac_cI-grave', 'Ì'),
|
||||
('mac_cI-acute', 'Í'),
|
||||
('mac_cI-circumflex', 'Î'),
|
||||
('mac_cI-diaeresis', 'Ï'),
|
||||
('mac_cEth', 'Ð'),
|
||||
('mac_cN-tilde', 'Ñ'),
|
||||
('mac_cO-grave', 'Ò'),
|
||||
('mac_cO-acute', 'Ó'),
|
||||
('mac_cO-circumflex', 'Ô'),
|
||||
('mac_cO-tilde', 'Õ'),
|
||||
('mac_cO-diaeresis', 'Ö'),
|
||||
('mac_cO-stroke', 'Ø'),
|
||||
('mac_cU-grave', 'Ù'),
|
||||
('mac_cU-acute', 'Ú'),
|
||||
('mac_cU-circumflex', 'Û'),
|
||||
('mac_cU-diaeresis', 'Ü'),
|
||||
('mac_cY-acute', 'Ý'),
|
||||
('mac_sa-grave', 'à'),
|
||||
('mac_sa-acute', 'á'),
|
||||
('mac_sa-circumflex', 'â'),
|
||||
('mac_sa-tilde', 'ã'),
|
||||
('mac_sa-diaeresis', 'ä'),
|
||||
('mac_sa-ring', 'å'),
|
||||
('mac_sae', 'æ'),
|
||||
('mac_sc-cedilla', 'ç'),
|
||||
('mac_se-grave', 'è'),
|
||||
('mac_se-acute', 'é'),
|
||||
('mac_se-circumflex', 'ê'),
|
||||
('mac_se-diaeresis', 'ë'),
|
||||
('mac_si-grave', 'ì'),
|
||||
('mac_si-acute', 'í'),
|
||||
('mac_si-circumflex', 'î'),
|
||||
('mac_si-diaeresis', 'ï'),
|
||||
('mac_sn-tilde', 'ñ'),
|
||||
('mac_so-grave', 'ò'),
|
||||
('mac_so-acute', 'ó'),
|
||||
('mac_so-circumflex', 'ô'),
|
||||
('mac_so-tilde', 'õ'),
|
||||
('mac_so-diaeresis', 'ö'),
|
||||
('mac_so-stroke', 'ø'),
|
||||
('mac_su-grave', 'ù'),
|
||||
('mac_su-acute', 'ú'),
|
||||
('mac_su-circumflex', 'û'),
|
||||
('mac_su-diaeresis', 'ü'),
|
||||
('mac_sy-acute', 'ý'),
|
||||
('mac_sy-diaeresis', 'ÿ'),
|
||||
('mac_cOE', 'Œ'),
|
||||
('mac_soe', 'œ'),
|
||||
('mac_bullet', '•'),
|
||||
('mac_franc', '₣'),
|
||||
('mac_lira', '₤'),
|
||||
('mac_rupee', '₨'),
|
||||
('mac_euro', '€'),
|
||||
('mac_spade', '♠'),
|
||||
('mac_club', '♣'),
|
||||
('mac_heart', '♥'),
|
||||
('mac_diamond', '♦'),
|
||||
('txt_dimension', '×'),
|
||||
('txt_quote_single_open', '‘'),
|
||||
('txt_quote_single_close', '’'),
|
||||
('txt_quote_double_open', '“'),
|
||||
('txt_quote_double_close', '”'),
|
||||
('txt_apostrophe', '’'),
|
||||
('txt_prime', '′'),
|
||||
('txt_prime_double', '″'),
|
||||
('txt_ellipsis', '…'),
|
||||
('txt_emdash', '—'),
|
||||
('txt_endash', '–'),
|
||||
('txt_trademark', '™'),
|
||||
('txt_registered', '®'),
|
||||
('txt_copyright', '©'),
|
||||
)
|
||||
macro_defaults = [
|
||||
(re.compile(r'{(c\||\|c)}'), r'¢'), # cent
|
||||
(re.compile(r'{(L-|-L)}'), r'£'), # pound
|
||||
(re.compile(r'{(Y=|=Y)}'), r'¥'), # yen
|
||||
(re.compile(r'{\(c\)}'), r'©'), # copyright
|
||||
(re.compile(r'{\(r\)}'), r'®'), # registered
|
||||
(re.compile(r'{(\+_|_\+)}'), r'±'), # plus-minus
|
||||
(re.compile(r'{1/4}'), r'¼'), # quarter
|
||||
(re.compile(r'{1/2}'), r'½'), # half
|
||||
(re.compile(r'{3/4}'), r'¾'), # three-quarter
|
||||
(re.compile(r'{(A`|`A)}'), r'À'), # A-acute
|
||||
(re.compile(r'{(A\'|\'A)}'), r'Á'), # A-grave
|
||||
(re.compile(r'{(A\^|\^A)}'), r'Â'), # A-circumflex
|
||||
(re.compile(r'{(A~|~A)}'), r'Ã'), # A-tilde
|
||||
(re.compile(r'{(A\"|\"A)}'), r'Ä'), # A-diaeresis
|
||||
(re.compile(r'{(Ao|oA)}'), r'Å'), # A-ring
|
||||
(re.compile(r'{(AE)}'), r'Æ'), # AE
|
||||
(re.compile(r'{(C,|,C)}'), r'Ç'), # C-cedilla
|
||||
(re.compile(r'{(E`|`E)}'), r'È'), # E-acute
|
||||
(re.compile(r'{(E\'|\'E)}'), r'É'), # E-grave
|
||||
(re.compile(r'{(E\^|\^E)}'), r'Ê'), # E-circumflex
|
||||
(re.compile(r'{(E\"|\"E)}'), r'Ë'), # E-diaeresis
|
||||
(re.compile(r'{(I`|`I)}'), r'Ì'), # I-acute
|
||||
(re.compile(r'{(I\'|\'I)}'), r'Í'), # I-grave
|
||||
(re.compile(r'{(I\^|\^I)}'), r'Î'), # I-circumflex
|
||||
(re.compile(r'{(I\"|\"I)}'), r'Ï'), # I-diaeresis
|
||||
(re.compile(r'{(D-|-D)}'), r'Ð'), # ETH
|
||||
(re.compile(r'{(N~|~N)}'), r'Ñ'), # N-tilde
|
||||
(re.compile(r'{(O`|`O)}'), r'Ò'), # O-acute
|
||||
(re.compile(r'{(O\'|\'O)}'), r'Ó'), # O-grave
|
||||
(re.compile(r'{(O\^|\^O)}'), r'Ô'), # O-circumflex
|
||||
(re.compile(r'{(O~|~O)}'), r'Õ'), # O-tilde
|
||||
(re.compile(r'{(O\"|\"O)}'), r'Ö'), # O-diaeresis
|
||||
(re.compile(r'{x}'), r'×'), # dimension
|
||||
(re.compile(r'{(O\/|\/O)}'), r'Ø'), # O-slash
|
||||
(re.compile(r'{(U`|`U)}'), r'Ù'), # U-acute
|
||||
(re.compile(r'{(U\'|\'U)}'), r'Ú'), # U-grave
|
||||
(re.compile(r'{(U\^|\^U)}'), r'Û'), # U-circumflex
|
||||
(re.compile(r'{(U\"|\"U)}'), r'Ü'), # U-diaeresis
|
||||
(re.compile(r'{(Y\'|\'Y)}'), r'Ý'), # Y-grave
|
||||
(re.compile(r'{sz}'), r'ß'), # sharp-s
|
||||
(re.compile(r'{(a`|`a)}'), r'à'), # a-grave
|
||||
(re.compile(r'{(a\'|\'a)}'), r'á'), # a-acute
|
||||
(re.compile(r'{(a\^|\^a)}'), r'â'), # a-circumflex
|
||||
(re.compile(r'{(a~|~a)}'), r'ã'), # a-tilde
|
||||
(re.compile(r'{(a\"|\"a)}'), r'ä'), # a-diaeresis
|
||||
(re.compile(r'{(ao|oa)}'), r'å'), # a-ring
|
||||
(re.compile(r'{ae}'), r'æ'), # ae
|
||||
(re.compile(r'{(c,|,c)}'), r'ç'), # c-cedilla
|
||||
(re.compile(r'{(e`|`e)}'), r'è'), # e-grave
|
||||
(re.compile(r'{(e\'|\'e)}'), r'é'), # e-acute
|
||||
(re.compile(r'{(e\^|\^e)}'), r'ê'), # e-circumflex
|
||||
(re.compile(r'{(e\"|\"e)}'), r'ë'), # e-diaeresis
|
||||
(re.compile(r'{(i`|`i)}'), r'ì'), # i-grave
|
||||
(re.compile(r'{(i\'|\'i)}'), r'í'), # i-acute
|
||||
(re.compile(r'{(i\^|\^i)}'), r'î'), # i-circumflex
|
||||
(re.compile(r'{(i\"|\"i)}'), r'ï'), # i-diaeresis
|
||||
(re.compile(r'{(d-|-d)}'), r'ð'), # eth
|
||||
(re.compile(r'{(n~|~n)}'), r'ñ'), # n-tilde
|
||||
(re.compile(r'{(o`|`o)}'), r'ò'), # o-grave
|
||||
(re.compile(r'{(o\'|\'o)}'), r'ó'), # o-acute
|
||||
(re.compile(r'{(o\^|\^o)}'), r'ô'), # o-circumflex
|
||||
(re.compile(r'{(o~|~o)}'), r'õ'), # o-tilde
|
||||
(re.compile(r'{(o\"|\"o)}'), r'ö'), # o-diaeresis
|
||||
(re.compile(r'{(o\/|\/o)}'), r'ø'), # o-stroke
|
||||
(re.compile(r'{(u`|`u)}'), r'ù'), # u-grave
|
||||
(re.compile(r'{(u\'|\'u)}'), r'ú'), # u-acute
|
||||
(re.compile(r'{(u\^|\^u)}'), r'û'), # u-circumflex
|
||||
(re.compile(r'{(u\"|\"u)}'), r'ü'), # u-diaeresis
|
||||
(re.compile(r'{(y\'|\'y)}'), r'ý'), # y-acute
|
||||
(re.compile(r'{(y\"|\"y)}'), r'ÿ'), # y-diaeresis
|
||||
(re.compile(r'{OE}'), r'Œ'), # OE
|
||||
(re.compile(r'{oe}'), r'œ'), # oe
|
||||
(re.compile(r'{(S\^|\^S)}'), r'Š'), # Scaron
|
||||
(re.compile(r'{(s\^|\^s)}'), r'š'), # scaron
|
||||
(re.compile(r'{\*}'), r'•'), # bullet
|
||||
(re.compile(r'{Fr}'), r'₣'), # Franc
|
||||
(re.compile(r'{(L=|=L)}'), r'₤'), # Lira
|
||||
(re.compile(r'{Rs}'), r'₨'), # Rupee
|
||||
(re.compile(r'{(C=|=C)}'), r'€'), # euro
|
||||
(re.compile(r'{tm}'), r'™'), # trademark
|
||||
(re.compile(r'{spades?}'), r'♠'), # spade
|
||||
(re.compile(r'{clubs?}'), r'♣'), # club
|
||||
(re.compile(r'{hearts?}'), r'♥'), # heart
|
||||
(re.compile(r'{diam(onds?|s)}'), r'♦'), # diamond
|
||||
]
|
||||
glyph_defaults = [
|
||||
(re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), r'\1\2×\3'), # dimension sign
|
||||
(re.compile(r'(\d+)\'', re.I), r'\1′'), # prime
|
||||
(re.compile(r'(\d+)\"', re.I), r'\1″'), # prime-double
|
||||
(re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), r'<acronym title="\2">\1</acronym>'), # 3+ uppercase acronym
|
||||
(re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), r'<span class="caps">\1</span>'), # 3+ uppercase
|
||||
(re.compile(r'\b(\s{0,1})?\.{3}'), r'\1…'), # ellipsis
|
||||
(re.compile(r'^[\*_-]{3,}$', re.M), r'<hr />'), # <hr> scene-break
|
||||
(re.compile(r'\b--\b'), r'—'), # em dash
|
||||
(re.compile(r'(\s)--(\s)'), r'\1—\2'), # em dash
|
||||
(re.compile(r'\s-(?:\s|$)'), r' – '), # en dash
|
||||
(re.compile(r'\b( ?)[([]TM[])]', re.I), r'\1™'), # trademark
|
||||
(re.compile(r'\b( ?)[([]R[])]', re.I), r'\1®'), # registered
|
||||
(re.compile(r'\b( ?)[([]C[])]', re.I), r'\1©'), # copyright
|
||||
]
|
||||
|
||||
|
||||
def __init__(self, restricted=False, lite=False, noimage=False):
|
||||
"""docstring for __init__"""
|
||||
@ -673,211 +684,15 @@ class Textile(object):
|
||||
# fix: hackish
|
||||
text = re.sub(r'"\Z', '\" ', text)
|
||||
|
||||
glyph_search = (
|
||||
re.compile(r'(\d+\'?\"?)( ?)x( ?)(?=\d+)'), # dimension sign
|
||||
re.compile(r"(\w)\'(\w)"), # apostrophe's
|
||||
re.compile(r'(\s)\'(\d+\w?)\b(?!\')'), # back in '88
|
||||
re.compile(r'(\S)\'(?=\s|'+self.pnct+'|<|$)'), # single closing
|
||||
re.compile(r'\'/'), # single opening
|
||||
re.compile(r'(\")\"'), # double closing - following another
|
||||
re.compile(r'(\S)\"(?=\s|'+self.pnct+'|<|$)'), # double closing
|
||||
re.compile(r'"'), # double opening
|
||||
re.compile(r'\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])'), # 3+ uppercase acronym
|
||||
re.compile(r'\b([A-Z][A-Z\'\-]+[A-Z])(?=[\s.,\)>])'), # 3+ uppercase
|
||||
re.compile(r'\b(\s{0,1})?\.{3}'), # ellipsis
|
||||
re.compile(r'(\s?)--(\s?)'), # em dash
|
||||
re.compile(r'\s-(?:\s|$)'), # en dash
|
||||
re.compile(r'\b( ?)[([]TM[])]', re.I), # trademark
|
||||
re.compile(r'\b( ?)[([]R[])]', re.I), # registered
|
||||
re.compile(r'\b( ?)[([]C[])]', re.I) # copyright
|
||||
)
|
||||
|
||||
glyph_replace = [x % dict(self.glyph_defaults) for x in (
|
||||
r'\1\2%(txt_dimension)s\3', # dimension sign
|
||||
r'\1%(txt_apostrophe)s\2', # apostrophe's
|
||||
r'\1%(txt_apostrophe)s\2', # back in '88
|
||||
r'\1%(txt_quote_single_close)s', # single closing
|
||||
r'%(txt_quote_single_open)s', # single opening
|
||||
r'\1%(txt_quote_double_close)s', # double closing - following another
|
||||
r'\1%(txt_quote_double_close)s', # double closing
|
||||
r'%(txt_quote_double_open)s', # double opening
|
||||
r'<acronym title="\2">\1</acronym>', # 3+ uppercase acronym
|
||||
r'<span class="caps">\1</span>', # 3+ uppercase
|
||||
r'\1%(txt_ellipsis)s', # ellipsis
|
||||
r'\1%(txt_emdash)s\2', # em dash
|
||||
r' %(txt_endash)s ', # en dash
|
||||
r'\1%(txt_trademark)s', # trademark
|
||||
r'\1%(txt_registered)s', # registered
|
||||
r'\1%(txt_copyright)s' # copyright
|
||||
)]
|
||||
|
||||
if re.search(r'{.+?}', text):
|
||||
glyph_search += (
|
||||
re.compile(r'{(c\||\|c)}'), # cent
|
||||
re.compile(r'{(L-|-L)}'), # pound
|
||||
re.compile(r'{(Y=|=Y)}'), # yen
|
||||
re.compile(r'{\(c\)}'), # copyright
|
||||
re.compile(r'{\(r\)}'), # registered
|
||||
re.compile(r'{1/4}'), # quarter
|
||||
re.compile(r'{1/2}'), # half
|
||||
re.compile(r'{3/4}'), # three-quarter
|
||||
re.compile(r'{(A`|`A)}'), # 192;
|
||||
re.compile(r'{(A\'|\'A)}'), # 193;
|
||||
re.compile(r'{(A\^|\^A)}'), # 194;
|
||||
re.compile(r'{(A~|~A)}'), # 195;
|
||||
re.compile(r'{(A\"|\"A)}'), # 196;
|
||||
re.compile(r'{(Ao|oA)}'), # 197;
|
||||
re.compile(r'{(AE)}'), # 198;
|
||||
re.compile(r'{(C,|,C)}'), # 199;
|
||||
re.compile(r'{(E`|`E)}'), # 200;
|
||||
re.compile(r'{(E\'|\'E)}'), # 201;
|
||||
re.compile(r'{(E\^|\^E)}'), # 202;
|
||||
re.compile(r'{(E\"|\"E)}'), # 203;
|
||||
re.compile(r'{(I`|`I)}'), # 204;
|
||||
re.compile(r'{(I\'|\'I)}'), # 205;
|
||||
re.compile(r'{(I\^|\^I)}'), # 206;
|
||||
re.compile(r'{(I\"|\"I)}'), # 207;
|
||||
re.compile(r'{(D-|-D)}'), # 208;
|
||||
re.compile(r'{(N~|~N)}'), # 209;
|
||||
re.compile(r'{(O`|`O)}'), # 210;
|
||||
re.compile(r'{(O\'|\'O)}'), # 211;
|
||||
re.compile(r'{(O\^|\^O)}'), # 212;
|
||||
re.compile(r'{(O~|~O)}'), # 213;
|
||||
re.compile(r'{(O\"|\"O)}'), # 214;
|
||||
re.compile(r'{(O\/|\/O)}'), # 215;
|
||||
re.compile(r'{(U`|`U)}'), # 216;
|
||||
re.compile(r'{(U\'|\'U)}'), # 217;
|
||||
re.compile(r'{(U\^|\^U)}'), # 218;
|
||||
re.compile(r'{(U\"|\"U)}'), # 219;
|
||||
re.compile(r'{(Y\'|\'Y)}'), # 220;
|
||||
re.compile(r'{(a`|`a)}'), # a-grace
|
||||
re.compile(r'{(a\'|\'a)}'), # a-acute
|
||||
re.compile(r'{(a\^|\^a)}'), # a-circumflex
|
||||
re.compile(r'{(a~|~a)}'), # a-tilde
|
||||
re.compile(r'{(a\"|\"a)}'), # a-diaeresis
|
||||
re.compile(r'{(ao|oa)}'), # a-ring
|
||||
re.compile(r'{ae}'), # ae
|
||||
re.compile(r'{(c,|,c)}'), # c-cedilla
|
||||
re.compile(r'{(e`|`e)}'), # e-grace
|
||||
re.compile(r'{(e\'|\'e)}'), # e-acute
|
||||
re.compile(r'{(e\^|\^e)}'), # e-circumflex
|
||||
re.compile(r'{(e\"|\"e)}'), # e-diaeresis
|
||||
re.compile(r'{(i`|`i)}'), # i-grace
|
||||
re.compile(r'{(i\'|\'i)}'), # i-acute
|
||||
re.compile(r'{(i\^|\^i)}'), # i-circumflex
|
||||
re.compile(r'{(i\"|\"i)}'), # i-diaeresis
|
||||
re.compile(r'{(n~|~n)}'), # n-tilde
|
||||
re.compile(r'{(o`|`o)}'), # o-grace
|
||||
re.compile(r'{(o\'|\'o)}'), # o-acute
|
||||
re.compile(r'{(o\^|\^o)}'), # o-circumflex
|
||||
re.compile(r'{(o~|~o)}'), # o-tilde
|
||||
re.compile(r'{(o\"|\"o)}'), # o-diaeresis
|
||||
re.compile(r'{(o\/|\/o)}'), # o-stroke
|
||||
re.compile(r'{(u`|`u)}'), # u-grace
|
||||
re.compile(r'{(u\'|\'u)}'), # u-acute
|
||||
re.compile(r'{(u\^|\^u)}'), # u-circumflex
|
||||
re.compile(r'{(u\"|\"u)}'), # u-diaeresis
|
||||
re.compile(r'{(y\'|\'y)}'), # y-acute
|
||||
re.compile(r'{(y\"|\"y)}'), # y-diaeresis
|
||||
re.compile(r'{OE}'), # y-diaeresis
|
||||
re.compile(r'{oe}'), # y-diaeresis
|
||||
re.compile(r'{\*}'), # bullet
|
||||
re.compile(r'{Fr}'), # Franc
|
||||
re.compile(r'{(L=|=L)}'), # Lira
|
||||
re.compile(r'{Rs}'), # Rupee
|
||||
re.compile(r'{(C=|=C)}'), # euro
|
||||
re.compile(r'{tm}'), # euro
|
||||
re.compile(r'{spade}'), # spade
|
||||
re.compile(r'{club}'), # club
|
||||
re.compile(r'{heart}'), # heart
|
||||
re.compile(r'{diamond}') # diamond
|
||||
)
|
||||
|
||||
glyph_replace += [x % dict(self.glyph_defaults) for x in (
|
||||
r'%(mac_cent)s', # cent
|
||||
r'%(mac_pound)s', # pound
|
||||
r'%(mac_yen)s', # yen
|
||||
r'%(txt_copyright)s', # copyright
|
||||
r'%(txt_registered)s', # registered
|
||||
r'%(mac_quarter)s', # quarter
|
||||
r'%(mac_half)s', # half
|
||||
r'%(mac_three-quarter)s', # three-quarter
|
||||
r'%(mac_cA-grave)s', # 192;
|
||||
r'%(mac_cA-acute)s', # 193;
|
||||
r'%(mac_cA-circumflex)s', # 194;
|
||||
r'%(mac_cA-tilde)s', # 195;
|
||||
r'%(mac_cA-diaeresis)s', # 196;
|
||||
r'%(mac_cA-ring)s', # 197;
|
||||
r'%(mac_cAE)s', # 198;
|
||||
r'%(mac_cC-cedilla)s', # 199;
|
||||
r'%(mac_cE-grave)s', # 200;
|
||||
r'%(mac_cE-acute)s', # 201;
|
||||
r'%(mac_cE-circumflex)s', # 202;
|
||||
r'%(mac_cE-diaeresis)s', # 203;
|
||||
r'%(mac_cI-grave)s', # 204;
|
||||
r'%(mac_cI-acute)s', # 205;
|
||||
r'%(mac_cI-circumflex)s', # 206;
|
||||
r'%(mac_cI-diaeresis)s', # 207;
|
||||
r'%(mac_cEth)s', # 208;
|
||||
r'%(mac_cN-tilde)s', # 209;
|
||||
r'%(mac_cO-grave)s', # 210;
|
||||
r'%(mac_cO-acute)s', # 211;
|
||||
r'%(mac_cO-circumflex)s', # 212;
|
||||
r'%(mac_cO-tilde)s', # 213;
|
||||
r'%(mac_cO-diaeresis)s', # 214;
|
||||
r'%(mac_cO-stroke)s', # 216;
|
||||
r'%(mac_cU-grave)s', # 217;
|
||||
r'%(mac_cU-acute)s', # 218;
|
||||
r'%(mac_cU-circumflex)s', # 219;
|
||||
r'%(mac_cU-diaeresis)s', # 220;
|
||||
r'%(mac_cY-acute)s', # 221;
|
||||
r'%(mac_sa-grave)s', # 224;
|
||||
r'%(mac_sa-acute)s', # 225;
|
||||
r'%(mac_sa-circumflex)s', # 226;
|
||||
r'%(mac_sa-tilde)s', # 227;
|
||||
r'%(mac_sa-diaeresis)s', # 228;
|
||||
r'%(mac_sa-ring)s', # 229;
|
||||
r'%(mac_sae)s', # 230;
|
||||
r'%(mac_sc-cedilla)s', # 231;
|
||||
r'%(mac_se-grave)s', # 232;
|
||||
r'%(mac_se-acute)s', # 233;
|
||||
r'%(mac_se-circumflex)s', # 234;
|
||||
r'%(mac_se-diaeresis)s', # 235;
|
||||
r'%(mac_si-grave)s', # 236;
|
||||
r'%(mac_si-acute)s', # 237;
|
||||
r'%(mac_si-circumflex)s', # 238;
|
||||
r'%(mac_si-diaeresis)s', # 239;
|
||||
r'%(mac_sn-tilde)s', # 241;
|
||||
r'%(mac_so-grave)s', # 242;
|
||||
r'%(mac_so-acute)s', # 243;
|
||||
r'%(mac_so-circumflex)s', # 244;
|
||||
r'%(mac_so-tilde)s', # 245;
|
||||
r'%(mac_so-diaeresis)s', # 246;
|
||||
r'%(mac_so-stroke)s', # 248;
|
||||
r'%(mac_su-grave)s', # 249;
|
||||
r'%(mac_su-acute)s', # 250;
|
||||
r'%(mac_su-circumflex)s', # 251;
|
||||
r'%(mac_su-diaeresis)s', # 252;
|
||||
r'%(mac_sy-acute)s', # 253;
|
||||
r'%(mac_sy-diaeresis)s', # 255;
|
||||
r'%(mac_cOE)s', # 338;
|
||||
r'%(mac_soe)s', # 339;
|
||||
r'%(mac_bullet)s', # bullet
|
||||
r'%(mac_franc)s', # franc
|
||||
r'%(mac_lira)s', # lira
|
||||
r'%(mac_rupee)s', # rupee
|
||||
r'%(mac_euro)s', # euro
|
||||
r'%(txt_trademark)s', # trademark
|
||||
r'%(mac_spade)s', # spade
|
||||
r'%(mac_club)s', # club
|
||||
r'%(mac_heart)s', # heart
|
||||
r'%(mac_diamond)s' # diamond
|
||||
)]
|
||||
|
||||
result = []
|
||||
for line in re.compile(r'(<.*?>)', re.U).split(text):
|
||||
if not re.search(r'<.*>', line):
|
||||
for s, r in zip(glyph_search, glyph_replace):
|
||||
rules = []
|
||||
if re.search(r'{.+?}', line):
|
||||
rules = self.macro_defaults + self.glyph_defaults
|
||||
else:
|
||||
rules = self.glyph_defaults
|
||||
for s, r in rules:
|
||||
line = s.sub(r, line)
|
||||
result.append(line)
|
||||
return ''.join(result)
|
||||
@ -927,7 +742,7 @@ class Textile(object):
|
||||
return url
|
||||
|
||||
def shelve(self, text):
|
||||
id = str(uuid.uuid4())
|
||||
id = str(uuid.uuid4()) + 'c'
|
||||
self.shelf[id] = text
|
||||
return id
|
||||
|
||||
@ -1049,7 +864,7 @@ class Textile(object):
|
||||
|
||||
for qtag in qtags:
|
||||
pattern = re.compile(r"""
|
||||
(?:^|(?<=[\s>%(pnct)s])|\[|([\]}]))
|
||||
(?:^|(?<=[\s>%(pnct)s\(])|\[|([\]}]))
|
||||
(%(qtag)s)(?!%(qtag)s)
|
||||
(%(c)s)
|
||||
(?::(\S+))?
|
||||
|
@ -165,6 +165,7 @@ class TXTInput(InputFormatPlugin):
|
||||
elif options.formatting_type == 'textile':
|
||||
log.debug('Running text through textile conversion...')
|
||||
html = convert_textile(txt)
|
||||
setattr(options, 'smarten_punctuation', True)
|
||||
else:
|
||||
log.debug('Running text through basic conversion...')
|
||||
flow_size = getattr(options, 'flow_size', 0)
|
||||
|
@ -25,8 +25,11 @@ class PreferencesAction(InterfaceAction):
|
||||
self.gui.run_wizard)
|
||||
if not DEBUG:
|
||||
pm.addSeparator()
|
||||
pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
|
||||
ac = pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
|
||||
self.debug_restart)
|
||||
ac.setShortcut('Ctrl+Shift+R')
|
||||
self.gui.addAction(ac)
|
||||
|
||||
self.qaction.setMenu(pm)
|
||||
self.preferences_menu = pm
|
||||
for x in (self.gui.preferences_action, self.qaction):
|
||||
|
@ -21,7 +21,7 @@ class StructureDetectionWidget(Widget, Ui_Form):
|
||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||
Widget.__init__(self, parent,
|
||||
['chapter', 'chapter_mark',
|
||||
'remove_first_image',
|
||||
'remove_first_image', 'remove_fake_margins',
|
||||
'insert_metadata', 'page_breaks_before']
|
||||
)
|
||||
self.db, self.book_id = db, book_id
|
||||
|
@ -48,10 +48,10 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="6" column="0" colspan="3">
|
||||
<item row="7" column="0" colspan="3">
|
||||
<widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
|
||||
</item>
|
||||
<item row="7" column="0" colspan="3">
|
||||
<item row="8" column="0" colspan="3">
|
||||
<spacer name="verticalSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
@ -77,7 +77,7 @@
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
<item row="4" column="0" colspan="3">
|
||||
<item row="5" column="0" colspan="3">
|
||||
<widget class="QLabel" name="label_2">
|
||||
<property name="text">
|
||||
<string>The header and footer removal options have been replaced by the Search & Replace options. Click the Search & Replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.</string>
|
||||
@ -87,6 +87,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="2">
|
||||
<widget class="QCheckBox" name="opt_remove_fake_margins">
|
||||
<property name="text">
|
||||
<string>Remove &fake margins</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<customwidgets>
|
||||
|
@ -16,8 +16,7 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, QFont, QSize, \
|
||||
QIcon, QPoint, QVBoxLayout, QHBoxLayout, QComboBox, QTimer,\
|
||||
QAbstractItemModel, QVariant, QModelIndex, QMenu, QFrame,\
|
||||
QPushButton, QWidget, QItemDelegate, QString, QLabel, \
|
||||
QShortcut, QKeySequence, SIGNAL, QMimeData, QSizePolicy,\
|
||||
QToolButton
|
||||
QShortcut, QKeySequence, SIGNAL, QMimeData, QToolButton
|
||||
|
||||
from calibre.ebooks.metadata import title_sort
|
||||
from calibre.gui2 import config, NONE, gprefs
|
||||
@ -1051,12 +1050,12 @@ class TagsModel(QAbstractItemModel): # {{{
|
||||
if (key == 'authors' and len(ids) >= 5):
|
||||
if not confirm('<p>'+_('Changing the authors for several books can '
|
||||
'take a while. Are you sure?')
|
||||
+'</p>', 'tag_browser_drop_authors', self.parent()):
|
||||
+'</p>', 'tag_browser_drop_authors', self.tags_view):
|
||||
return
|
||||
elif len(ids) > 15:
|
||||
if not confirm('<p>'+_('Changing the metadata for that many books '
|
||||
'can take a while. Are you sure?')
|
||||
+'</p>', 'tag_browser_many_changes', self.parent()):
|
||||
+'</p>', 'tag_browser_many_changes', self.tags_view):
|
||||
return
|
||||
|
||||
fm = self.db.metadata_for_field(key)
|
||||
|
@ -12,18 +12,17 @@ __docformat__ = 'restructuredtext en'
|
||||
import collections, os, sys, textwrap, time, gc
|
||||
from Queue import Queue, Empty
|
||||
from threading import Thread
|
||||
from PyQt4.Qt import Qt, SIGNAL, QTimer, QHelpEvent, QAction, \
|
||||
QMenu, QIcon, pyqtSignal, \
|
||||
QDialog, QSystemTrayIcon, QApplication, QKeySequence
|
||||
from PyQt4.Qt import (Qt, SIGNAL, QTimer, QHelpEvent, QAction,
|
||||
QMenu, QIcon, pyqtSignal, QUrl,
|
||||
QDialog, QSystemTrayIcon, QApplication, QKeySequence)
|
||||
|
||||
from calibre import prints
|
||||
from calibre.constants import __appname__, isosx
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.config import prefs, dynamic
|
||||
from calibre.utils.ipc.server import Server
|
||||
from calibre.library.database2 import LibraryDatabase2
|
||||
from calibre.customize.ui import interface_actions
|
||||
from calibre.gui2 import error_dialog, GetMetadata, open_local_file, \
|
||||
from calibre.gui2 import error_dialog, GetMetadata, open_url, \
|
||||
gprefs, max_available_height, config, info_dialog, Dispatcher, \
|
||||
question_dialog
|
||||
from calibre.gui2.cover_flow import CoverFlowMixin
|
||||
@ -567,37 +566,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
|
||||
QApplication.instance().quit()
|
||||
|
||||
def donate(self, *args):
|
||||
BUTTON = '''
|
||||
<form action="https://www.paypal.com/cgi-bin/webscr" method="post">
|
||||
<input type="hidden" name="cmd" value="_s-xclick" />
|
||||
<input type="hidden" name="hosted_button_id" value="3029467" />
|
||||
<input type="image" src="https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif" border="0" name="submit" alt="Donate to support calibre development" />
|
||||
<img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
|
||||
</form>
|
||||
'''
|
||||
MSG = _('is the result of the efforts of many volunteers from all '
|
||||
'over the world. If you find it useful, please consider '
|
||||
'donating to support its development. Your donation helps '
|
||||
'keep calibre development going.')
|
||||
HTML = u'''
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
||||
<title>Donate to support calibre</title>
|
||||
</head>
|
||||
<body style="background:white">
|
||||
<div><a href="http://calibre-ebook.com"><img style="border:0px"
|
||||
src="file://%s" alt="calibre" /></a></div>
|
||||
<p>Calibre %s</p>
|
||||
%s
|
||||
</body>
|
||||
</html>
|
||||
'''%(P('content_server/calibre_banner.png').replace(os.sep, '/'), MSG, BUTTON)
|
||||
pt = PersistentTemporaryFile('_donate.htm')
|
||||
pt.write(HTML.encode('utf-8'))
|
||||
pt.close()
|
||||
open_local_file(pt.name)
|
||||
|
||||
open_url(QUrl('http://calibre-ebook.com/donate'))
|
||||
|
||||
def confirm_quit(self):
|
||||
if self.job_manager.has_jobs():
|
||||
|
@ -317,7 +317,7 @@ class CoverView(QGraphicsView, ImageDropMixin):
|
||||
ImageDropMixin.__init__(self)
|
||||
|
||||
def get_pixmap(self):
|
||||
for item in self.scene().items():
|
||||
for item in self.scene.items():
|
||||
if hasattr(item, 'pixmap'):
|
||||
return item.pixmap()
|
||||
|
||||
@ -342,6 +342,7 @@ class FontFamilyModel(QAbstractListModel):
|
||||
self.families = list(qt_families.intersection(set(self.families)))
|
||||
self.families.sort()
|
||||
self.families[:0] = [_('None')]
|
||||
self.font = QFont('sansserif')
|
||||
|
||||
def rowCount(self, *args):
|
||||
return len(self.families)
|
||||
@ -354,10 +355,11 @@ class FontFamilyModel(QAbstractListModel):
|
||||
return NONE
|
||||
if role == Qt.DisplayRole:
|
||||
return QVariant(family)
|
||||
if False and role == Qt.FontRole:
|
||||
# Causes a Qt crash with some fonts
|
||||
# so disabled.
|
||||
return QVariant(QFont(family))
|
||||
if role == Qt.FontRole:
|
||||
# If a user chooses some non standard font as the interface font,
|
||||
# rendering some font names causes Qt to crash, so return what is
|
||||
# hopefully a "safe" font
|
||||
return QVariant(self.font)
|
||||
return NONE
|
||||
|
||||
def index_of(self, family):
|
||||
|
@ -549,6 +549,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
|
||||
- Download metadata and shortcuts
|
||||
* - :kbd:`Ctrl+R`
|
||||
- Restart calibre
|
||||
* - :kbd:`Ctrl+Shift+R`
|
||||
- Restart calibre in debug mode
|
||||
* - :kbd:`Shift+Ctrl+E`
|
||||
- Add empty books to calibre
|
||||
* - :kbd:`Ctrl+Q`
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python2
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
@ -8,114 +8,71 @@ __docformat__ = 'restructuredtext en'
|
||||
Plugin to make the commit command automatically close bugs when the commit
|
||||
message contains `Fix #number` or `Implement #number`. Also updates the commit
|
||||
message with the summary of the closed bug. It also set the `--fixes` metadata
|
||||
appropriately. Currently only works with a Trac bug repository with the XMLRPC
|
||||
plugin enabled.
|
||||
|
||||
To use copy this file into `~/.bazaar/plugins` and add the following to branch.conf
|
||||
in the working tree you want to use it with::
|
||||
|
||||
trac_reponame_url = <url>
|
||||
trac_reponame_username = <username>
|
||||
trac_reponame_password = <password>
|
||||
appropriately.
|
||||
|
||||
'''
|
||||
import os, re, xmlrpclib, subprocess
|
||||
from bzrlib.builtins import cmd_commit as _cmd_commit, tree_files
|
||||
from bzrlib import branch
|
||||
import re, urllib, importlib, sys
|
||||
from bzrlib.builtins import cmd_commit as _cmd_commit
|
||||
import bzrlib
|
||||
|
||||
from lxml import html
|
||||
|
||||
SENDMAIL = ('/home/kovid/work/kde', 'pgp_mail')
|
||||
|
||||
class cmd_commit(_cmd_commit):
|
||||
|
||||
@classmethod
|
||||
def trac_url(self, username, password, url):
|
||||
return url.replace('//', '//%s:%s@'%(username, password))+'/login/xmlrpc'
|
||||
|
||||
def get_trac_summary(self, bug, url):
|
||||
print 'Getting bug summary for bug #%s'%bug,
|
||||
server = xmlrpclib.ServerProxy(url)
|
||||
attributes = server.ticket.get(int(bug))[-1]
|
||||
print attributes['summary']
|
||||
return attributes['summary']
|
||||
|
||||
def expand_bug(self, msg, nick, config, bug_tracker, type='trac'):
|
||||
prefix = '%s_%s_'%(type, nick)
|
||||
username = config.get_user_option(prefix+'username')
|
||||
password = config.get_user_option(prefix+'password')
|
||||
close_bug = config.get_user_option(prefix+'pattern')
|
||||
if close_bug is None:
|
||||
def expand_bug(self, msg):
|
||||
close_bug = r'(Fix|Implement|Fixes|Fixed|Implemented)\s+#(\d+)'
|
||||
close_bug_pat = re.compile(close_bug, re.IGNORECASE)
|
||||
match = close_bug_pat.search(msg)
|
||||
if not match:
|
||||
return msg, None, None, None
|
||||
return msg, None, None
|
||||
action, bug = match.group(1), match.group(2)
|
||||
summary = ''
|
||||
if type == 'trac':
|
||||
url = self.trac_url(username, password, bug_tracker)
|
||||
summary = self.get_trac_summary(bug, url)
|
||||
raw = urllib.urlopen('https://bugs.launchpad.net/calibre/+bug/' +
|
||||
bug).read()
|
||||
h1 = html.fromstring(raw).xpath('//h1[@id="edit-title"]')[0]
|
||||
summary = html.tostring(h1, method='text', encoding=unicode).strip()
|
||||
print 'Working on bug:', summary
|
||||
if summary:
|
||||
msg = msg.replace('#%s'%bug, '#%s (%s)'%(bug, summary))
|
||||
msg = msg.replace('Fixesed', 'Fixed')
|
||||
return msg, bug, url, action
|
||||
|
||||
|
||||
def get_bugtracker(self, basedir, type='trac'):
|
||||
config = os.path.join(basedir, '.bzr', 'branch', 'branch.conf')
|
||||
bugtracker, nick = None, None
|
||||
if os.access(config, os.R_OK):
|
||||
for line in open(config).readlines():
|
||||
match = re.search(r'%s_(\S+)_url\s*=\s*(\S+)'%type, line)
|
||||
if match:
|
||||
nick, bugtracker = match.group(1), match.group(2)
|
||||
break
|
||||
return nick, bugtracker
|
||||
|
||||
def expand_message(self, msg, tree):
|
||||
nick, bugtracker = self.get_bugtracker(tree.basedir, type='trac')
|
||||
if not bugtracker:
|
||||
return msg
|
||||
config = branch.Branch.open(tree.basedir).get_config()
|
||||
msg, bug, url, action = self.expand_bug(msg, nick, config, bugtracker)
|
||||
|
||||
return msg, bug, url, action, nick, config
|
||||
return msg, bug, action
|
||||
|
||||
def run(self, message=None, file=None, verbose=False, selected_list=None,
|
||||
unchanged=False, strict=False, local=False, fixes=None,
|
||||
author=None, show_diff=False, exclude=None):
|
||||
nick = config = bug = action = None
|
||||
bug = action = None
|
||||
if message:
|
||||
try:
|
||||
message, bug, url, action, nick, config = \
|
||||
self.expand_message(message, tree_files(selected_list)[0])
|
||||
except ValueError:
|
||||
pass
|
||||
message, bug, action = self.expand_bug(message)
|
||||
|
||||
if nick and bug and not fixes:
|
||||
fixes = [nick+':'+bug]
|
||||
if bug and not fixes:
|
||||
fixes = ['lp:'+bug]
|
||||
|
||||
ret = _cmd_commit.run(self, message=message, file=file, verbose=verbose,
|
||||
selected_list=selected_list, unchanged=unchanged,
|
||||
strict=strict, local=local, fixes=fixes,
|
||||
author=author, show_diff=show_diff, exclude=exclude)
|
||||
if message and bug and action and nick and config:
|
||||
self.close_bug(bug, action, url, config)
|
||||
if message and bug and action:
|
||||
self.close_bug(bug, action)
|
||||
return ret
|
||||
|
||||
def close_bug(self, bug, action, url, config):
|
||||
def close_bug(self, bug, action):
|
||||
print 'Closing bug #%s'% bug
|
||||
#nick = config.get_nickname()
|
||||
suffix = config.get_user_option('bug_close_comment')
|
||||
if suffix is None:
|
||||
suffix = 'The fix will be in the next release.'
|
||||
suffix = ('The fix will be in the next release. '
|
||||
'calibre is usually released every Friday.')
|
||||
action = action+'ed'
|
||||
msg = '%s in branch %s. %s'%(action, 'lp:calibre', suffix)
|
||||
msg = msg.replace('Fixesed', 'Fixed')
|
||||
server = xmlrpclib.ServerProxy(url)
|
||||
server.ticket.update(int(bug), msg,
|
||||
{'status':'closed', 'resolution':'fixed'},
|
||||
True)
|
||||
subprocess.Popen('/home/kovid/work/kde/mail.py -f --delay 10'.split())
|
||||
msg += '\n\n status fixreleased'
|
||||
|
||||
sys.path.insert(0, SENDMAIL[0])
|
||||
|
||||
sendmail = importlib.import_module(SENDMAIL[1])
|
||||
|
||||
to = bug+'@bugs.launchpad.net'
|
||||
sendmail.sendmail(msg, to, 'Fixed in lp:calibre')
|
||||
|
||||
|
||||
bzrlib.commands.register_command(cmd_commit)
|
||||
|
@ -8,15 +8,18 @@ import re, htmlentitydefs
|
||||
_ascii_pat = None
|
||||
|
||||
def clean_ascii_chars(txt, charlist=None):
|
||||
'''
|
||||
Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
|
||||
This is all control chars except \\t,\\n and \\r
|
||||
r'''
|
||||
Remove ASCII control chars.
|
||||
This is all control chars except \t, \n and \r
|
||||
'''
|
||||
if not txt:
|
||||
return ''
|
||||
global _ascii_pat
|
||||
if _ascii_pat is None:
|
||||
chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
|
||||
chars = set(xrange(32))
|
||||
chars.add(127)
|
||||
for x in (9, 10, 13):
|
||||
chars.remove(x)
|
||||
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
|
||||
|
||||
if charlist is None:
|
||||
|
@ -584,6 +584,12 @@ def educateQuotes(str):
|
||||
# <p>He said, "'Quoted' words in a larger quote."</p>
|
||||
str = re.sub(r""""'(?=\w)""", """“‘""", str)
|
||||
str = re.sub(r"""'"(?=\w)""", """‘“""", str)
|
||||
str = re.sub(r'''""(?=\w)''', """““""", str)
|
||||
str = re.sub(r"""''(?=\w)""", """‘‘""", str)
|
||||
str = re.sub(r'''\"\'''', """”’""", str)
|
||||
str = re.sub(r'''\'\"''', """’”""", str)
|
||||
str = re.sub(r'''""''', """””""", str)
|
||||
str = re.sub(r"""''""", """’’""", str)
|
||||
|
||||
# Special case for decade abbreviations (the '80s):
|
||||
str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str)
|
||||
|
@ -251,12 +251,12 @@ class WMF(object):
|
||||
img.load(bmp)
|
||||
return img.export('png')
|
||||
|
||||
def wmf_unwrap(wmf_data):
|
||||
def wmf_unwrap(wmf_data, verbose=0):
|
||||
'''
|
||||
Return the largest embedded raster image in the WMF.
|
||||
The returned data is in PNG format.
|
||||
'''
|
||||
w = WMF()
|
||||
w = WMF(verbose=verbose)
|
||||
w(wmf_data)
|
||||
if not w.has_raster_image:
|
||||
raise ValueError('No raster image found in the WMF')
|
||||
@ -266,4 +266,5 @@ if __name__ == '__main__':
|
||||
wmf = WMF(verbose=4)
|
||||
wmf(open(sys.argv[-1], 'rb'))
|
||||
open('/t/test.bmp', 'wb').write(wmf.bitmaps[0])
|
||||
open('/t/test.png', 'wb').write(wmf.to_png())
|
||||
|
||||
|
@ -28,6 +28,7 @@ class Article(object):
|
||||
pass
|
||||
if not isinstance(self._title, unicode):
|
||||
self._title = self._title.decode('utf-8', 'replace')
|
||||
self._title = clean_ascii_chars(self._title)
|
||||
self.url = url
|
||||
self.author = author
|
||||
if author and not isinstance(author, unicode):
|
||||
@ -75,7 +76,7 @@ class Article(object):
|
||||
t = t.decode('utf-8', 'replace')
|
||||
return t
|
||||
def fset(self, val):
|
||||
self._title = val
|
||||
self._title = clean_ascii_chars(val)
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user