Sync to trunk.

This commit is contained in:
John Schember 2011-03-21 18:29:41 -04:00
commit 2d46e35a6c
29 changed files with 482 additions and 211 deletions

View File

@ -8,13 +8,13 @@ __description__ = 'Providing context and clarity on national and international n
'''csmonitor.com'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class ChristianScienceMonitor(BasicNewsRecipe):
author = 'Kovid Goyal, Sujata Raman and Lorenzo Vigentini'
__author__ = 'Kovid Goyal'
description = 'Providing context and clarity on national and international news, peoples and cultures'
cover_url = 'http://www.csmonitor.com/extension/csm_base/design/csm_design/images/csmlogo_179x46.gif'
@ -34,6 +34,49 @@ class ChristianScienceMonitor(BasicNewsRecipe):
remove_javascript = True
no_stylesheets = True
def append_page(self, soup, appendtag, position):
nav = soup.find('div',attrs={'class':'navigation'})
if nav:
pager = nav.findAll('a')
for part in pager:
if 'Next' in part:
nexturl = ('http://www.csmonitor.com' +
re.findall(r'href="(.*?)"', str(part))[0])
soup2 = self.index_to_soup(nexturl)
texttag = soup2.find('div',
attrs={'class': re.compile('list-article-.*')})
trash_c = soup2.findAll(attrs={'class': 'list-description'})
trash_h = soup2.h1
for tc in trash_c: tc.extract()
trash_h.extract()
newpos = len(texttag.contents)
self.append_page(soup2, texttag, newpos)
texttag.extract()
appendtag.insert(position, texttag)
def preprocess_html(self, soup):
PRINT_RE = re.compile(r'/layout/set/print/content/view/print/[0-9]*')
html = str(soup)
try:
print_found = PRINT_RE.findall(html)
except Exception:
pass
if print_found:
print_url = 'http://www.csmonitor.com' + print_found[0]
print_soup = self.index_to_soup(print_url)
else:
self.append_page(soup, soup.body, 3)
trash_a = soup.findAll(attrs={'class': re.compile('navigation.*')})
trash_b = soup.findAll(attrs={'style': re.compile('.*')})
trash_d = soup.findAll(attrs={'class': 'sByline'})
for ta in trash_a: ta.extract()
for tb in trash_b: tb.extract()
for td in trash_d: td.extract()
print_soup = soup
return print_soup
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
@ -43,7 +86,6 @@ class ChristianScienceMonitor(BasicNewsRecipe):
(r'Full HTML version of this story which may include photos, graphics, and related links.*</body>',
lambda match : '</body>'),
]]
extra_css = '''
h1{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: large}
.sub{ color:#000000;font-family: Georgia,Times,"Times New Roman",serif; font-size: small;}
@ -56,10 +98,9 @@ class ChristianScienceMonitor(BasicNewsRecipe):
#main{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: small; }
#photo-details{ font-family:Arial,Helvetica,sans-serif ; color:#999999; font-size: x-small;}
span.name{color:#205B87;font-family: Georgia,Times,"Times New Roman",serif; font-size: x-small}
p#dateline{color:#444444 ; font-family:Arial,Helvetica,sans-serif ; font-style:italic;}
'''
feeds = [
(u'Top Stories' , u'http://rss.csmonitor.com/feeds/top'),
p#dateline{color:#444444 ; font-family:Arial,Helvetica,sans-serif ; font-style:italic;} '''
feeds = [(u'Top Stories', u'http://rss.csmonitor.com/feeds/top'),
(u'World' , u'http://rss.csmonitor.com/feeds/world'),
(u'USA' , u'http://rss.csmonitor.com/feeds/usa'),
(u'Commentary' , u'http://rss.csmonitor.com/feeds/commentary'),
@ -74,9 +115,7 @@ class ChristianScienceMonitor(BasicNewsRecipe):
(u'Home Forum' , u'http://rss.csmonitor.com/feeds/homeforum')
]
keep_only_tags = [
dict(name='div', attrs={'id':'mainColumn'}),
]
keep_only_tags = [dict(name='div', attrs={'id':'mainColumn'}), ]
remove_tags = [
dict(name='div', attrs={'id':['story-tools','videoPlayer','storyRelatedBottom','enlarge-photo','photo-paginate']}),
@ -86,7 +125,10 @@ class ChristianScienceMonitor(BasicNewsRecipe):
'hide', 'podBrdr']}),
dict(name='ul', attrs={'class':[ 'centerliststories']}) ,
dict(name='form', attrs={'id':[ 'commentform']}) ,
dict(name='div', attrs={'class': ['ui-comments']})
]
remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']})]
remove_tags_after = [ dict(name='div', attrs={'class':[ 'ad csmAd']}),
dict(name='div', attrs={'class': [re.compile('navigation.*')]}),
dict(name='div', attrs={'style': [re.compile('.*')]})
]

View File

@ -1,4 +1,3 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
@ -6,55 +5,48 @@ class WashingtonPost(BasicNewsRecipe):
title = 'Washington Post'
description = 'US political news'
__author__ = 'Kovid Goyal and Sujata Raman'
__author__ = 'Kovid Goyal'
use_embedded_content = False
max_articles_per_feed = 20
language = 'en'
encoding = 'utf-8'
remove_javascript = True
no_stylesheets = True
extra_css = '''
#articleCopyright { font-family:Arial,helvetica,sans-serif ; font-weight:bold ; font-size:x-small ;}
p { font-family:"Times New Roman",times,serif ; font-weight:normal ; font-size:small ;}
body{font-family:arial,helvetica,sans-serif}
'''
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
('Politics', 'http://www.washingtonpost.com/wp-dyn/rss/politics/index.xml'),
('Nation', 'http://www.washingtonpost.com/wp-dyn/rss/nation/index.xml'),
('World', 'http://www.washingtonpost.com/wp-dyn/rss/world/index.xml'),
('Business', 'http://www.washingtonpost.com/wp-dyn/rss/business/index.xml'),
('Technology', 'http://www.washingtonpost.com/wp-dyn/rss/technology/index.xml'),
('Health', 'http://www.washingtonpost.com/wp-dyn/rss/health/index.xml'),
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
('Style',
'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
('NFL Sports',
'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
feeds = [
('Politics', 'http://www.washingtonpost.com/rss/politics'),
('Nation', 'http://www.washingtonpost.com/rss/national'),
('World', 'http://www.washingtonpost.com/rss/world'),
('Business', 'http://www.washingtonpost.com/rss/business'),
('Lifestyle', 'http://www.washingtonpost.com/rss/lifestyle'),
('Sports', 'http://www.washingtonpost.com/rss/sports'),
('Redskins', 'http://www.washingtonpost.com/rss/sports/redskins'),
('Opinions', 'http://www.washingtonpost.com/rss/opinions'),
('Entertainment', 'http://www.washingtonpost.com/rss/entertainment'),
('Local', 'http://www.washingtonpost.com/rss/local'),
('Investigations',
'http://www.washingtonpost.com/rss/investigations'),
]
remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
remove_tags = [
{'class':lambda x: x and 'article-toolbar' in x},
{'class':lambda x: x and 'quick-comments' in x},
{'class':lambda x: x and 'tweet' in x},
{'class':lambda x: x and 'article-related' in x},
{'class':lambda x: x and 'hidden' in x.split()},
{'class':lambda x: x and 'also-read' in x.split()},
{'class':lambda x: x and 'partners-content' in x.split()},
{'class':['module share', 'module ads', 'comment-vars', 'hidden',
'share-icons-wrap', 'comments']},
{'id':['right-rail']},
]
keep_only_tags = dict(id=['content', 'article'])
def get_article_url(self, article):
return article.get('guid', article.get('link', None))
def print_version(self, url):
return url.rpartition('.')[0] + '_pf.html'
url = url.rpartition('?')[0]
return url.replace('_story.html', '_singlePage.html')
def postprocess_html(self, soup, first):
for div in soup.findAll(name='div', style=re.compile('margin')):
div['style'] = ''
return soup
def preprocess_html(self, soup):
for tag in soup.findAll('font'):
if tag.has_key('size'):
if tag['size'] == '+2':
if tag.b:
return soup
return None

View File

@ -18,6 +18,6 @@ def recipe_title_callback(raw):
return eval(raw.decode('utf-8'))
vipy.session.add_content_browser('.r', ',r', 'Recipe',
vipy.session.glob_based_iterator(os.path.join(project_dir, 'resources', 'recipes', '*.recipe')),
vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
vipy.session.regexp_based_matcher(r'title\s*=\s*(?P<title>.+)', 'title', recipe_title_callback))
EOFPY

View File

@ -14,9 +14,9 @@ from setup.build_environment import HOST, PROJECT
BASE_RSYNC = ['rsync', '-avz', '--delete']
EXCLUDES = []
for x in [
'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac',
'src/calibre/plugins', 'src/calibre/manual', 'src/calibre/trac', 'recipes',
'.bzr', '.build', '.svn', 'build', 'dist', 'imgsrc', '*.pyc', '*.pyo', '*.swp',
'*.swo']:
'*.swo', 'format_docs']:
EXCLUDES.extend(['--exclude', x])
SAFE_EXCLUDES = ['"%s"'%x if '*' in x else x for x in EXCLUDES]
@ -138,7 +138,7 @@ class VMInstaller(Command):
self.vm = self.VM
if not self.vmware_started():
self.start_vmware()
subprocess.call(['chmod', '-R', '+r', 'resources/recipes'])
subprocess.call(['chmod', '-R', '+r', 'recipes'])
self.start_vm()
self.download_installer()
if not self.dont_shutdown:

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os, shutil, glob, py_compile, subprocess, re
import sys, os, shutil, glob, py_compile, subprocess, re, zipfile, time
from setup import Command, modules, functions, basenames, __version__, \
__appname__
@ -40,6 +40,13 @@ DESCRIPTIONS = {
'calibre-smtp' : 'Command line interface for sending books via email',
}
def walk(dir):
''' A nice interface to os.walk '''
for record in os.walk(dir):
for f in record[-1]:
yield os.path.join(record[0], f)
class Win32Freeze(Command, WixMixIn):
description = 'Free windows calibre installation'
@ -63,12 +70,15 @@ class Win32Freeze(Command, WixMixIn):
self.rc_template = self.j(self.d(self.a(__file__)), 'template.rc')
self.py_ver = ''.join(map(str, sys.version_info[:2]))
self.lib_dir = self.j(self.base, 'Lib')
self.pydlib = self.j(self.base, 'pydlib')
self.pylib = self.j(self.base, 'pylib.zip')
self.initbase()
self.build_launchers()
self.freeze()
self.embed_manifests()
self.install_site_py()
self.archive_lib_dir()
self.create_installer()
def initbase(self):
@ -356,4 +366,108 @@ class Win32Freeze(Command, WixMixIn):
dest, lib]
self.run_builder(cmd)
def archive_lib_dir(self):
self.info('Putting all python code into a zip file for performance')
if os.path.exists(self.pydlib):
shutil.rmtree(self.pydlib)
os.makedirs(self.pydlib)
self.zf_timestamp = time.localtime(time.time())[:6]
self.zf_names = set()
with zipfile.ZipFile(self.pylib, 'w', zipfile.ZIP_STORED) as zf:
for x in os.listdir(self.lib_dir):
if x == 'site-packages':
continue
self.add_to_zipfile(zf, x, self.lib_dir)
sp = self.j(self.lib_dir, 'site-packages')
handled = set(['site.pyo'])
for pth in ('PIL.pth', 'pywin32.pth'):
handled.add(pth)
shutil.copyfile(self.j(sp, pth), self.j(self.pydlib, pth))
for d in self.get_pth_dirs(self.j(sp, pth)):
shutil.copytree(d, self.j(self.pydlib, self.b(d)), True)
handled.add(self.b(d))
handled.add('easy-install.pth')
for d in self.get_pth_dirs(self.j(sp, 'easy-install.pth')):
handled.add(self.b(d))
zip_safe = self.is_zip_safe(d)
for x in os.listdir(d):
if x == 'EGG-INFO':
continue
if zip_safe:
self.add_to_zipfile(zf, x, d)
else:
absp = self.j(d, x)
dest = self.j(self.pydlib, x)
if os.path.isdir(absp):
shutil.copytree(absp, dest, True)
else:
shutil.copy2(absp, dest)
for x in os.listdir(sp):
if x in handled or x.endswith('.egg-info'):
continue
absp = self.j(sp, x)
if os.path.isdir(absp):
if not os.listdir(absp):
continue
if self.is_zip_safe(absp):
self.add_to_zipfile(zf, x, sp)
else:
shutil.copytree(absp, self.j(self.pydlib, x), True)
else:
if x.endswith('.pyd'):
shutil.copy2(absp, self.j(self.pydlib, x))
else:
self.add_to_zipfile(zf, x, sp)
shutil.rmtree(self.lib_dir)
def is_zip_safe(self, path):
for f in walk(path):
ext = os.path.splitext(f)[1].lower()
if ext in ('.pyd', '.dll', '.exe'):
return False
return True
def get_pth_dirs(self, pth):
base = os.path.dirname(pth)
for line in open(pth).readlines():
line = line.strip()
if not line or line.startswith('#') or line.startswith('import'):
continue
if line == 'win32\\lib':
continue
candidate = self.j(base, line)
if os.path.exists(candidate):
yield candidate
def add_to_zipfile(self, zf, name, base, exclude=frozenset()):
abspath = self.j(base, name)
name = name.replace(os.sep, '/')
if name in self.zf_names:
raise ValueError('Already added %r to zipfile [%r]'%(name, abspath))
zinfo = zipfile.ZipInfo(filename=name, date_time=self.zf_timestamp)
if os.path.isdir(abspath):
if not os.listdir(abspath):
return
zinfo.external_attr = 0700 << 16
zf.writestr(zinfo, '')
for x in os.listdir(abspath):
if x not in exclude:
self.add_to_zipfile(zf, name + os.sep + x, base)
else:
ext = os.path.splitext(name)[1].lower()
if ext in ('.pyd', '.dll', '.exe'):
raise ValueError('Cannot add %r to zipfile'%abspath)
zinfo.external_attr = 0600 << 16
if ext in ('.py', '.pyc', '.pyo'):
with open(abspath, 'rb') as f:
zf.writestr(zinfo, f.read())
self.zf_names.add(name)

View File

@ -96,7 +96,7 @@ def main():
abs__file__()
addsitedir(os.path.join(sys.app_dir, 'Lib', 'site-packages'))
addsitedir(os.path.join(sys.app_dir, 'pydlib'))
add_calibre_vars()

View File

@ -198,7 +198,7 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
buf[strlen(buf)-1] = '\0';
_snprintf_s(python_home, MAX_PATH, _TRUNCATE, "%s", buf);
_snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\DLLs;%s\\Lib;%s\\Lib\\site-packages",
_snprintf_s(path, 3*MAX_PATH, _TRUNCATE, "%s\\pylib.zip;%s\\pydlib;%s\\DLLs",
buf, buf, buf);
free(buf);

View File

@ -154,9 +154,9 @@
<CustomAction Id="LaunchApplication" BinaryKey="WixCA"
DllEntry="WixShellExec" Impersonate="yes"/>
<InstallUISequence>
<!--<InstallUISequence>
<FileCost Suppress="yes" />
</InstallUISequence>
</InstallUISequence>-->
</Product>
</Wix>

View File

@ -58,12 +58,14 @@ class ANDROID(USBMS):
0x413c : { 0xb007 : [0x0100, 0x0224]},
# LG
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100] },
0x1004 : { 0x61cc : [0x100], 0x61ce : [0x100], 0x618e : [0x226] },
# Archos
0x0e79 : {
0x1400 : [0x0222, 0x0216],
0x1408 : [0x0222, 0x0216],
0x1411 : [0x216],
0x1417 : [0x0216],
0x1419 : [0x0216],
0x1420 : [0x0216],
0x1422 : [0x0216]
@ -91,14 +93,14 @@ class ANDROID(USBMS):
VENDOR_NAME = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC']
'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
'7', 'A956', 'A955']
'7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7']

View File

@ -19,7 +19,7 @@ class BLACKBERRY(USBMS):
VENDOR_ID = [0x0fca]
PRODUCT_ID = [0x8004, 0x0004]
BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211]
BCD = [0x0200, 0x0107, 0x0210, 0x0201, 0x0211, 0x0220]
VENDOR_NAME = 'RIM'
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'

View File

@ -49,6 +49,8 @@ HEURISTIC_OPTIONS = ['markup_chapter_headings',
'dehyphenate', 'renumber_headings',
'replace_scene_breaks']
DEFAULT_TRUE_OPTIONS = HEURISTIC_OPTIONS + ['remove_fake_margins']
def print_help(parser, log):
help = parser.format_help().encode(preferred_encoding, 'replace')
log(help)
@ -90,7 +92,7 @@ def option_recommendation_to_cli_option(add_option, rec):
if opt.long_switch == 'verbose':
attrs['action'] = 'count'
attrs.pop('type', '')
if opt.name in HEURISTIC_OPTIONS and rec.recommended_value is True:
if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True:
switches = ['--disable-'+opt.long_switch]
add_option(Option(*switches, **attrs))
@ -162,6 +164,7 @@ def add_pipeline_options(parser, plumber):
'chapter', 'chapter_mark',
'prefer_metadata_cover', 'remove_first_image',
'insert_metadata', 'page_breaks_before',
'remove_fake_margins',
]
),

View File

@ -304,6 +304,17 @@ OptionRecommendation(name='page_breaks_before',
'before the specified elements.')
),
OptionRecommendation(name='remove_fake_margins',
recommended_value=True, level=OptionRecommendation.LOW,
help=_('Some documents specify page margins by '
'specifying a left and right margin on each individual '
'paragraph. calibre will try to detect and remove these '
'margins. Sometimes, this can cause the removal of '
'margins that should not have been removed. In this '
'case you can disable the removal.')
),
OptionRecommendation(name='margin_top',
recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the top margin in pts. Default is %default. '
@ -988,9 +999,13 @@ OptionRecommendation(name='sr3_replace',
page_break_on_body=self.output_plugin.file_type in ('mobi',
'lit'))
flattener(self.oeb, self.opts)
self.opts.insert_blank_line = oibl
self.opts.remove_paragraph_spacing = orps
from calibre.ebooks.oeb.transforms.page_margin import RemoveFakeMargins
RemoveFakeMargins()(self.oeb, self.log, self.opts)
pr(0.9)
self.flush()

View File

@ -21,6 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date, isoformat
from calibre.utils.localization import get_lang
from calibre import prints
from calibre.utils.cleantext import clean_ascii_chars
class Resource(object): # {{{
'''
@ -1157,7 +1158,7 @@ class OPFCreator(Metadata):
def DC_ELEM(tag, text, dc_attrs={}, opf_attrs={}):
if text:
elem = getattr(DC, tag)(text, **dc_attrs)
elem = getattr(DC, tag)(clean_ascii_chars(text), **dc_attrs)
else:
elem = getattr(DC, tag)(**dc_attrs)
for k, v in opf_attrs.items():
@ -1260,7 +1261,6 @@ def metadata_to_opf(mi, as_string=True):
from lxml import etree
import textwrap
from calibre.ebooks.oeb.base import OPF, DC
from calibre.utils.cleantext import clean_ascii_chars
if not mi.application_id:
mi.application_id = str(uuid.uuid4())

View File

@ -0,0 +1,153 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from collections import Counter
from calibre.ebooks.oeb.base import OEB_STYLES, barename, XPath
class RemoveFakeMargins(object):
'''
Remove left and right margins from paragraph/divs if the same margin is specified
on almost all the elements of at that level.
Must be called only after CSS flattening
'''
def __call__(self, oeb, log, opts):
if not opts.remove_fake_margins:
return
self.oeb, self.log, self.opts = oeb, log, opts
stylesheet = None
self.levels = {}
self.stats = {}
self.selector_map = {}
for item in self.oeb.manifest:
if item.media_type.lower() in OEB_STYLES:
stylesheet = item
break
if stylesheet is None:
return
self.log('Removing fake margins...')
stylesheet = stylesheet.data
from cssutils.css import CSSRule
for rule in stylesheet.cssRules.rulesOfType(CSSRule.STYLE_RULE):
self.selector_map[rule.selectorList.selectorText] = rule.style
self.find_levels()
for level in self.levels:
self.process_level(level)
def get_margins(self, elem):
cls = elem.get('class', None)
if cls:
style = self.selector_map.get('.'+cls, None)
if style:
return style.marginLeft, style.marginRight, style
return '', '', None
def process_level(self, level):
elems = self.levels[level]
self.stats[level+'_left'] = Counter()
self.stats[level+'_right'] = Counter()
for elem in elems:
lm, rm = self.get_margins(elem)[:2]
self.stats[level+'_left'][lm] += 1
self.stats[level+'_right'][rm] += 1
self.log.debug(level, ' left margin stats:', self.stats[level+'_left'])
self.log.debug(level, ' right margin stats:', self.stats[level+'_right'])
remove_left = self.analyze_stats(self.stats[level+'_left'])
remove_right = self.analyze_stats(self.stats[level+'_right'])
if remove_left:
mcl = self.stats[level+'_left'].most_common(1)[0][0]
self.log('Removing level %s left margin of:'%level, mcl)
if remove_right:
mcr = self.stats[level+'_right'].most_common(1)[0][0]
self.log('Removing level %s right margin of:'%level, mcr)
if remove_left or remove_right:
for elem in elems:
lm, rm, style = self.get_margins(elem)
if remove_left and lm == mcl:
style.removeProperty('margin-left')
if remove_right and rm == mcr:
style.removeProperty('margin-right')
def find_levels(self):
def level_of(elem, body):
ans = 1
while elem.getparent() is not body:
ans += 1
elem = elem.getparent()
return ans
paras = XPath('descendant::h:p|descendant::h:div')
for item in self.oeb.spine:
body = XPath('//h:body')(item.data)
if not body:
continue
body = body[0]
for p in paras(body):
level = level_of(p, body)
level = '%s_%d'%(barename(p.tag), level)
if level not in self.levels:
self.levels[level] = []
self.levels[level].append(p)
remove = set()
for k, v in self.levels.iteritems():
num = len(v)
self.log.debug('Found %d items of level:'%num, k)
level = int(k.split('_')[-1])
tag = k.split('_')[0]
if tag == 'p' and num < 25:
remove.add(k)
if tag == 'div':
if level > 2 and num < 25:
remove.add(k)
elif level < 3:
# Check each level < 3 element and only keep those
# that have many child paras
for elem in list(v):
children = len(paras(elem))
if children < 5:
v.remove(elem)
for k in remove:
self.levels.pop(k)
self.log.debug('Ignoring level', k)
def analyze_stats(self, stats):
if not stats:
return False
mc = stats.most_common(1)
if len(mc) > 1:
return False
mc = mc[0]
most_common, most_common_count = mc
if not most_common or most_common == '0':
return False
total = sum(stats.values())
# True if greater than 95% of elements have the same margin
return most_common_count/total > 0.95

View File

@ -22,6 +22,7 @@ border_style_map = {
'dot-dot-dash': 'dotted',
'outset': 'outset',
'tripple': 'double',
'triple': 'double',
'thick-thin-small': 'solid',
'thin-thick-small': 'solid',
'thin-thick-thin-small': 'solid',

View File

@ -25,8 +25,11 @@ class PreferencesAction(InterfaceAction):
self.gui.run_wizard)
if not DEBUG:
pm.addSeparator()
pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
ac = pm.addAction(QIcon(I('debug.png')), _('Restart in debug mode'),
self.debug_restart)
ac.setShortcut('Ctrl+Shift+R')
self.gui.addAction(ac)
self.qaction.setMenu(pm)
self.preferences_menu = pm
for x in (self.gui.preferences_action, self.qaction):

View File

@ -21,7 +21,7 @@ class StructureDetectionWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
['chapter', 'chapter_mark',
'remove_first_image',
'remove_first_image', 'remove_fake_margins',
'insert_metadata', 'page_breaks_before']
)
self.db, self.book_id = db, book_id

View File

@ -48,10 +48,10 @@
</property>
</widget>
</item>
<item row="6" column="0" colspan="3">
<item row="7" column="0" colspan="3">
<widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
</item>
<item row="7" column="0" colspan="3">
<item row="8" column="0" colspan="3">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -77,7 +77,7 @@
</property>
</spacer>
</item>
<item row="4" column="0" colspan="3">
<item row="5" column="0" colspan="3">
<widget class="QLabel" name="label_2">
<property name="text">
<string>The header and footer removal options have been replaced by the Search &amp; Replace options. Click the Search &amp; Replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.</string>
@ -87,6 +87,13 @@
</property>
</widget>
</item>
<item row="2" column="2">
<widget class="QCheckBox" name="opt_remove_fake_margins">
<property name="text">
<string>Remove &amp;fake margins</string>
</property>
</widget>
</item>
</layout>
</widget>
<customwidgets>

View File

@ -428,7 +428,7 @@ class BooksModel(QAbstractTableModel): # {{{
au = self.db.authors(row)
if not au:
au = _('Unknown')
au = ', '.join([a.strip() for a in au.split(',')])
au = authors_to_string([a.strip().replace('|', ',') for a in au.split(',')])
data[_('Author(s)')] = au
return data

View File

@ -16,7 +16,7 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, QFont, QSize, \
QIcon, QPoint, QVBoxLayout, QHBoxLayout, QComboBox, QTimer,\
QAbstractItemModel, QVariant, QModelIndex, QMenu, QFrame,\
QPushButton, QWidget, QItemDelegate, QString, QLabel, \
QShortcut, QKeySequence, SIGNAL, QMimeData
QShortcut, QKeySequence, SIGNAL, QMimeData, QToolButton
from calibre.ebooks.metadata import title_sort
from calibre.gui2 import config, NONE, gprefs
@ -707,8 +707,8 @@ class TagTreeItem(object): # {{{
break
elif self.tag.state == TAG_SEARCH_STATES['mark_plusplus'] or\
self.tag.state == TAG_SEARCH_STATES['mark_minusminus']:
if self.tag.is_searchable and self.tag.is_hierarchical \
and len(self.children):
if self.tag.is_searchable and len(self.children) and \
self.tag.is_hierarchical == '5state':
break
else:
break
@ -803,7 +803,8 @@ class TagsModel(QAbstractItemModel): # {{{
self.category_nodes.append(node)
node.can_be_edited = (not is_gst) and (i == (len(path_parts)-1))
node.is_gst = is_gst
node.tag.is_hierarchical = not is_gst
if not is_gst:
node.tag.is_hierarchical = '5state'
if not is_gst:
tree_root[p] = {}
tree_root = tree_root[p]
@ -1050,12 +1051,12 @@ class TagsModel(QAbstractItemModel): # {{{
if (key == 'authors' and len(ids) >= 5):
if not confirm('<p>'+_('Changing the authors for several books can '
'take a while. Are you sure?')
+'</p>', 'tag_browser_drop_authors', self.parent()):
+'</p>', 'tag_browser_drop_authors', self.tags_view):
return
elif len(ids) > 15:
if not confirm('<p>'+_('Changing the metadata for that many books '
'can take a while. Are you sure?')
+'</p>', 'tag_browser_many_changes', self.parent()):
+'</p>', 'tag_browser_many_changes', self.tags_view):
return
fm = self.db.metadata_for_field(key)
@ -1294,7 +1295,8 @@ class TagsModel(QAbstractItemModel): # {{{
if t.type != TagTreeItem.CATEGORY])
if (comp,tag.category) in child_map:
node_parent = child_map[(comp,tag.category)]
node_parent.tag.is_hierarchical = key != 'search'
node_parent.tag.is_hierarchical = \
'5state' if tag.category != 'search' else '3state'
else:
if i < len(components)-1:
t = copy.copy(tag)
@ -1309,7 +1311,8 @@ class TagsModel(QAbstractItemModel): # {{{
t = tag
if not in_uc:
t.original_name = t.name
t.is_hierarchical = key != 'search'
t.is_hierarchical = \
'5state' if t.category != 'search' else '3state'
t.name = comp
self.beginInsertRows(category_index, 999999, 1)
node_parent = TagTreeItem(parent=node_parent, data=t,
@ -2058,17 +2061,18 @@ class TagBrowserWidget(QWidget): # {{{
sc = QShortcut(QKeySequence(_('ALT+f')), parent)
sc.connect(sc, SIGNAL('activated()'), self.set_focus_to_find_box)
self.search_button = QPushButton()
self.search_button = QToolButton()
self.search_button.setText(_('F&ind'))
self.search_button.setToolTip(_('Find the first/next matching item'))
self.search_button.setFixedWidth(40)
search_layout.addWidget(self.search_button)
self.expand_button = QPushButton()
self.expand_button = QToolButton()
self.expand_button.setText('-')
self.expand_button.setFixedWidth(20)
self.expand_button.setToolTip(_('Collapse all categories'))
search_layout.addWidget(self.expand_button)
search_layout.setStretch(0, 10)
search_layout.setStretch(1, 1)
search_layout.setStretch(2, 1)
self.current_find_position = None
self.search_button.clicked.connect(self.find)

View File

@ -12,18 +12,17 @@ __docformat__ = 'restructuredtext en'
import collections, os, sys, textwrap, time, gc
from Queue import Queue, Empty
from threading import Thread
from PyQt4.Qt import Qt, SIGNAL, QTimer, QHelpEvent, QAction, \
QMenu, QIcon, pyqtSignal, \
QDialog, QSystemTrayIcon, QApplication, QKeySequence
from PyQt4.Qt import (Qt, SIGNAL, QTimer, QHelpEvent, QAction,
QMenu, QIcon, pyqtSignal, QUrl,
QDialog, QSystemTrayIcon, QApplication, QKeySequence)
from calibre import prints
from calibre.constants import __appname__, isosx
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import prefs, dynamic
from calibre.utils.ipc.server import Server
from calibre.library.database2 import LibraryDatabase2
from calibre.customize.ui import interface_actions
from calibre.gui2 import error_dialog, GetMetadata, open_local_file, \
from calibre.gui2 import error_dialog, GetMetadata, open_url, \
gprefs, max_available_height, config, info_dialog, Dispatcher, \
question_dialog
from calibre.gui2.cover_flow import CoverFlowMixin
@ -567,37 +566,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
QApplication.instance().quit()
def donate(self, *args):
BUTTON = '''
<form action="https://www.paypal.com/cgi-bin/webscr" method="post">
<input type="hidden" name="cmd" value="_s-xclick" />
<input type="hidden" name="hosted_button_id" value="3029467" />
<input type="image" src="https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif" border="0" name="submit" alt="Donate to support calibre development" />
<img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
</form>
'''
MSG = _('is the result of the efforts of many volunteers from all '
'over the world. If you find it useful, please consider '
'donating to support its development. Your donation helps '
'keep calibre development going.')
HTML = u'''
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<title>Donate to support calibre</title>
</head>
<body style="background:white">
<div><a href="http://calibre-ebook.com"><img style="border:0px"
src="file://%s" alt="calibre" /></a></div>
<p>Calibre %s</p>
%s
</body>
</html>
'''%(P('content_server/calibre_banner.png').replace(os.sep, '/'), MSG, BUTTON)
pt = PersistentTemporaryFile('_donate.htm')
pt.write(HTML.encode('utf-8'))
pt.close()
open_local_file(pt.name)
open_url(QUrl('http://calibre-ebook.com/donate'))
def confirm_quit(self):
if self.job_manager.has_jobs():

View File

@ -317,7 +317,7 @@ class CoverView(QGraphicsView, ImageDropMixin):
ImageDropMixin.__init__(self)
def get_pixmap(self):
for item in self.scene().items():
for item in self.scene.items():
if hasattr(item, 'pixmap'):
return item.pixmap()
@ -342,6 +342,7 @@ class FontFamilyModel(QAbstractListModel):
self.families = list(qt_families.intersection(set(self.families)))
self.families.sort()
self.families[:0] = [_('None')]
self.font = QFont('sansserif')
def rowCount(self, *args):
return len(self.families)
@ -354,10 +355,11 @@ class FontFamilyModel(QAbstractListModel):
return NONE
if role == Qt.DisplayRole:
return QVariant(family)
if False and role == Qt.FontRole:
# Causes a Qt crash with some fonts
# so disabled.
return QVariant(QFont(family))
if role == Qt.FontRole:
# If a user chooses some non standard font as the interface font,
# rendering some font names causes Qt to crash, so return what is
# hopefully a "safe" font
return QVariant(self.font)
return NONE
def index_of(self, family):

View File

@ -53,7 +53,7 @@ class Tag(object):
self.id = id
self.count = count
self.state = state
self.is_hierarchical = False
self.is_hierarchical = ''
self.is_editable = is_editable
self.is_searchable = is_searchable
self.id_set = id_set if id_set is not None else set([])

View File

@ -549,6 +549,8 @@ Calibre has several keyboard shortcuts to save you time and mouse movement. Thes
- Download metadata and shortcuts
* - :kbd:`Ctrl+R`
- Restart calibre
* - :kbd:`Ctrl+Shift+R`
- Restart calibre in debug mode
* - :kbd:`Shift+Ctrl+E`
- Add empty books to calibre
* - :kbd:`Ctrl+Q`

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python2
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
@ -8,114 +8,71 @@ __docformat__ = 'restructuredtext en'
Plugin to make the commit command automatically close bugs when the commit
message contains `Fix #number` or `Implement #number`. Also updates the commit
message with the summary of the closed bug. It also set the `--fixes` metadata
appropriately. Currently only works with a Trac bug repository with the XMLRPC
plugin enabled.
To use copy this file into `~/.bazaar/plugins` and add the following to branch.conf
in the working tree you want to use it with::
trac_reponame_url = <url>
trac_reponame_username = <username>
trac_reponame_password = <password>
appropriately.
'''
import os, re, xmlrpclib, subprocess
from bzrlib.builtins import cmd_commit as _cmd_commit, tree_files
from bzrlib import branch
import re, urllib, importlib, sys
from bzrlib.builtins import cmd_commit as _cmd_commit
import bzrlib
from lxml import html
SENDMAIL = ('/home/kovid/work/kde', 'pgp_mail')
class cmd_commit(_cmd_commit):
@classmethod
def trac_url(self, username, password, url):
return url.replace('//', '//%s:%s@'%(username, password))+'/login/xmlrpc'
def get_trac_summary(self, bug, url):
print 'Getting bug summary for bug #%s'%bug,
server = xmlrpclib.ServerProxy(url)
attributes = server.ticket.get(int(bug))[-1]
print attributes['summary']
return attributes['summary']
def expand_bug(self, msg, nick, config, bug_tracker, type='trac'):
prefix = '%s_%s_'%(type, nick)
username = config.get_user_option(prefix+'username')
password = config.get_user_option(prefix+'password')
close_bug = config.get_user_option(prefix+'pattern')
if close_bug is None:
close_bug = r'(Fix|Implement|Fixes|Fixed|Implemented)\s+#(\d+)'
def expand_bug(self, msg):
close_bug = r'(Fix|Implement|Fixes|Fixed|Implemented)\s+#(\d+)'
close_bug_pat = re.compile(close_bug, re.IGNORECASE)
match = close_bug_pat.search(msg)
if not match:
return msg, None, None, None
return msg, None, None
action, bug = match.group(1), match.group(2)
summary = ''
if type == 'trac':
url = self.trac_url(username, password, bug_tracker)
summary = self.get_trac_summary(bug, url)
raw = urllib.urlopen('https://bugs.launchpad.net/calibre/+bug/' +
bug).read()
h1 = html.fromstring(raw).xpath('//h1[@id="edit-title"]')[0]
summary = html.tostring(h1, method='text', encoding=unicode).strip()
print 'Working on bug:', summary
if summary:
msg = msg.replace('#%s'%bug, '#%s (%s)'%(bug, summary))
msg = msg.replace('Fixesed', 'Fixed')
return msg, bug, url, action
def get_bugtracker(self, basedir, type='trac'):
config = os.path.join(basedir, '.bzr', 'branch', 'branch.conf')
bugtracker, nick = None, None
if os.access(config, os.R_OK):
for line in open(config).readlines():
match = re.search(r'%s_(\S+)_url\s*=\s*(\S+)'%type, line)
if match:
nick, bugtracker = match.group(1), match.group(2)
break
return nick, bugtracker
def expand_message(self, msg, tree):
nick, bugtracker = self.get_bugtracker(tree.basedir, type='trac')
if not bugtracker:
return msg
config = branch.Branch.open(tree.basedir).get_config()
msg, bug, url, action = self.expand_bug(msg, nick, config, bugtracker)
return msg, bug, url, action, nick, config
return msg, bug, action
def run(self, message=None, file=None, verbose=False, selected_list=None,
unchanged=False, strict=False, local=False, fixes=None,
author=None, show_diff=False, exclude=None):
nick = config = bug = action = None
bug = action = None
if message:
try:
message, bug, url, action, nick, config = \
self.expand_message(message, tree_files(selected_list)[0])
except ValueError:
pass
message, bug, action = self.expand_bug(message)
if nick and bug and not fixes:
fixes = [nick+':'+bug]
if bug and not fixes:
fixes = ['lp:'+bug]
ret = _cmd_commit.run(self, message=message, file=file, verbose=verbose,
selected_list=selected_list, unchanged=unchanged,
strict=strict, local=local, fixes=fixes,
author=author, show_diff=show_diff, exclude=exclude)
if message and bug and action and nick and config:
self.close_bug(bug, action, url, config)
if message and bug and action:
self.close_bug(bug, action)
return ret
def close_bug(self, bug, action, url, config):
def close_bug(self, bug, action):
print 'Closing bug #%s'% bug
#nick = config.get_nickname()
suffix = config.get_user_option('bug_close_comment')
if suffix is None:
suffix = 'The fix will be in the next release.'
suffix = ('The fix will be in the next release.'
'calibre is usually released every Friday.')
action = action+'ed'
msg = '%s in branch %s. %s'%(action, 'lp:calibre', suffix)
msg = msg.replace('Fixesed', 'Fixed')
server = xmlrpclib.ServerProxy(url)
server.ticket.update(int(bug), msg,
{'status':'closed', 'resolution':'fixed'},
True)
subprocess.Popen('/home/kovid/work/kde/mail.py -f --delay 10'.split())
msg += '\n\n status fixreleased'
sys.path.insert(0, SENDMAIL[0])
sendmail = importlib.import_module(SENDMAIL[1])
to = bug+'@bugs.launchpad.net'
sendmail.sendmail(msg, to, 'Re: calibre bug '+bug)
bzrlib.commands.register_command(cmd_commit)

View File

@ -8,15 +8,18 @@ import re, htmlentitydefs
_ascii_pat = None
def clean_ascii_chars(txt, charlist=None):
'''
Remove ASCII control chars: 0 to 8 and 11, 12, 14-31 by default
This is all control chars except \\t,\\n and \\r
r'''
Remove ASCII control chars.
This is all control chars except \t, \n and \r
'''
if not txt:
return ''
global _ascii_pat
if _ascii_pat is None:
chars = list(range(8)) + [0x0B, 0x0C] + list(range(0x0E, 0x1F))
chars = set(xrange(32))
chars.add(127)
for x in (9, 10, 13):
chars.remove(x)
_ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
if charlist is None:

View File

@ -251,12 +251,12 @@ class WMF(object):
img.load(bmp)
return img.export('png')
def wmf_unwrap(wmf_data):
def wmf_unwrap(wmf_data, verbose=0):
'''
Return the largest embedded raster image in the WMF.
The returned data is in PNG format.
'''
w = WMF()
w = WMF(verbose=verbose)
w(wmf_data)
if not w.has_raster_image:
raise ValueError('No raster image found in the WMF')
@ -266,4 +266,5 @@ if __name__ == '__main__':
wmf = WMF(verbose=4)
wmf(open(sys.argv[-1], 'rb'))
open('/t/test.bmp', 'wb').write(wmf.bitmaps[0])
open('/t/test.png', 'wb').write(wmf.to_png())

View File

@ -28,6 +28,7 @@ class Article(object):
pass
if not isinstance(self._title, unicode):
self._title = self._title.decode('utf-8', 'replace')
self._title = clean_ascii_chars(self._title)
self.url = url
self.author = author
if author and not isinstance(author, unicode):
@ -75,7 +76,7 @@ class Article(object):
t = t.decode('utf-8', 'replace')
return t
def fset(self, val):
self._title = val
self._title = clean_ascii_chars(val)
return property(fget=fget, fset=fset)

View File

@ -140,8 +140,8 @@ class RecipeModel(QAbstractItemModel, SearchQueryParser):
self.scheduler_config = SchedulerConfig()
try:
with zipfile.ZipFile(P('builtin_recipes.zip'), 'r') as zf:
self.favicons = dict([(x, zf.getinfo(x)) for x in zf.namelist() if
x.endswith('.png')])
self.favicons = dict([(x.filename, x) for x in zf.infolist() if
x.filename.endswith('.png')])
except:
self.favicons = {}
self.do_refresh()