diff --git a/epydoc-pdf.conf b/epydoc-pdf.conf deleted file mode 100644 index 03ed6d104d..0000000000 --- a/epydoc-pdf.conf +++ /dev/null @@ -1,50 +0,0 @@ -[epydoc] # Epydoc section marker (required by ConfigParser) - -# Information about the project. -name: calibre -url: http://calibre.kovidgoyal.net - -# The list of modules to document. Modules can be named using -# dotted names, module filenames, or package directory names. -# This option may be repeated. -modules: calibre.devices, calibre.ebooks.lrf.web.profiles - -output: pdf -target: docs/pdf - -frames: no - -# graph -# The list of graph types that should be automatically included -# in the output. Graphs are generated using the Graphviz "dot" -# executable. Graph types include: "classtree", "callgraph", -# "umlclass". Use "all" to include all graph types -graph: classtree - -# css -# The CSS stylesheet for HTML output. Can be the name of a builtin -# stylesheet, or the name of a file. -css: white - -# link -# HTML code for the project link in the navigation bar. If left -# unspecified, the project link will be generated based on the -# project's name and URL. -#link: My Cool Project - -# top -# The "top" page for the documentation. Can be a URL, the name -# of a module or class, or one of the special names "trees.html", -# "indices.html", or "help.html" -# top: calibre - -# verbosity -# An integer indicating how verbose epydoc should be. The default -# value is 0; negative values will supress warnings and errors; -# positive values will give more verbose output. -#verbosity: 0 - -# separate-classes -# Whether each class should be listed in its own section when -# generating LaTeX or PDF output. -#separate-classes: no diff --git a/epydoc.conf b/epydoc.conf deleted file mode 100644 index 3259623054..0000000000 --- a/epydoc.conf +++ /dev/null @@ -1,51 +0,0 @@ -[epydoc] # Epydoc section marker (required by ConfigParser) - -# Information about the project. -name: calibre - API documentation -url: http://calibre.kovidgoyal.net - -# The list of modules to document. Modules can be named using -# dotted names, module filenames, or package directory names. -# This option may be repeated. -modules: calibre.devices, calibre.ebooks.lrf.web.profiles - -# Write html output to the directory "docs" -output: html -target: docs/html - -frames: no - -# graph -# The list of graph types that should be automatically included -# in the output. Graphs are generated using the Graphviz "dot" -# executable. Graph types include: "classtree", "callgraph", -# "umlclass". Use "all" to include all graph types -graph: classtree - -# css -# The CSS stylesheet for HTML output. Can be the name of a builtin -# stylesheet, or the name of a file. -css: white - -# link -# HTML code for the project link in the navigation bar. If left -# unspecified, the project link will be generated based on the -# project's name and URL. -link: calibre - -# top -# The "top" page for the documentation. Can be a URL, the name -# of a module or class, or one of the special names "trees.html", -# "indices.html", or "help.html" -#top: - -# verbosity -# An integer indicating how verbose epydoc should be. The default -# value is 0; negative values will supress warnings and errors; -# positive values will give more verbose output. -#verbosity: 0 - -# separate-classes -# Whether each class should be listed in its own section when -# generating LaTeX or PDF output. -#separate-classes: no diff --git a/pylint.conf b/pylint.conf deleted file mode 100644 index 3eec91c1ce..0000000000 --- a/pylint.conf +++ /dev/null @@ -1,602 +0,0 @@ -# lint Python modules using external checkers. - -# - -# This is the main checker controling the other ones and the reports - -# generation. It is itself both a raw checker and an astng checker in order - -# to: - -# * handle message activation / deactivation at the module level - -# * handle some basic but necessary stats'data (number of classes, methods...) - -# - -[MASTER] - - - -# Specify a configuration file. - -#rcfile= - - - -# Profiled execution. - -profile=no - - - -# Add to the black list. It should be a base name, not a - -# path. You may set this option multiple times. - -ignore=CVS - - - -# Pickle collected data for later comparisons. - -persistent=yes - - - -# Set the cache size for astng objects. - -cache-size=500 - - - -# List of plugins (as comma separated values of python modules names) to load, - -# usually to register additional checkers. - -load-plugins= - - - - - -[MESSAGES CONTROL] - - - -# Enable only checker(s) with the given id(s). This option conflict with the - -# disable-checker option - -#enable-checker= - - - -# Enable all checker(s) except those with the given id(s). This option conflict - -# with the disable-checker option - -#disable-checker= - - - -# Enable all messages in the listed categories. - -#enable-msg-cat= - - - -# Disable all messages in the listed categories. - -#disable-msg-cat= - - - -# Enable the message(s) with the given id(s). - -#enable-msg= - - - -# Disable the message(s) with the given id(s). - -#disable-msg= - - - - - -[REPORTS] - - - -# set the output format. Available formats are text, parseable, colorized and - -# html - -output-format=colorized - - - -# Include message's id in output - -include-ids=no - - - -# Put messages in a separate file for each module / package specified on the - -# command line instead of printing them on stdout. Reports (if any) will be - -# written in a file name "pylint_global.[txt|html]". - -files-output=no - - - -# Tells wether to display a full report or only the messages - -reports=yes - - - -# Python expression which should return a note less than 10 (10 is the highest - -# note).You have access to the variables errors warning, statement which - -# respectivly contain the number of errors / warnings messages and the total - -# number of statements analyzed. This is used by the global evaluation report - -# (R0004). - -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - - - -# Add a comment according to your evaluation note. This is used by the global - -# evaluation report (R0004). - -comment=no - - - -# Enable the report(s) with the given id(s). - -#enable-report= - - - -# Disable the report(s) with the given id(s). - -#disable-report= - - - - - -# checks for - -# * unused variables / imports - -# * undefined variables - -# * redefinition of variable from builtins or from an outer scope - -# * use of variable before assigment - -# - -[VARIABLES] - - - -# Tells wether we should check for unused import in __init__ files. - -init-import=no - - - -# A regular expression matching names used for dummy variables (i.e. not used). - -dummy-variables-rgx=_|dummy - - - -# List of additional names supposed to be defined in builtins. Remember that - -# you should avoid to define new builtins when possible. - -additional-builtins= - - - - - -# try to find bugs in the code using type inference - -# - -[TYPECHECK] - - - -# Tells wether missing members accessed in mixin class should be ignored. A - -# mixin class is detected if its name ends with "mixin" (case insensitive). - -ignore-mixin-members=yes - - - -# When zope mode is activated, consider the acquired-members option to ignore - -# access to some undefined attributes. - -zope=no - - - -# List of members which are usually get through zope's acquisition mecanism and - -# so shouldn't trigger E0201 when accessed (need zope=yes to be considered). - -acquired-members=REQUEST,acl_users,aq_parent - - - - - -# checks for : - -# * doc strings - -# * modules / classes / functions / methods / arguments / variables name - -# * number of arguments, local variables, branchs, returns and statements in - -# functions, methods - -# * required module attributes - -# * dangerous default values as arguments - -# * redefinition of function / method / class - -# * uses of the global statement - -# - -[BASIC] - - - -# Required attributes for module, separated by a comma - -required-attributes= - - - -# Regular expression which should only match functions or classes name which do - -# not require a docstring - -no-docstring-rgx=__.*__ - - - -# Regular expression which should only match correct module names - -module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ - - - -# Regular expression which should only match correct module level names - -const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$ - - - -# Regular expression which should only match correct class names - -class-rgx=[A-Z_][a-zA-Z0-9]+$ - - - -# Regular expression which should only match correct function names - -function-rgx=[a-z_][a-z0-9_]{2,30}$ - - - -# Regular expression which should only match correct method names - -method-rgx=[a-z_][a-z0-9_]{2,30}$ - - - -# Regular expression which should only match correct instance attribute names - -attr-rgx=[a-z_][a-z0-9_]{2,30}$ - - - -# Regular expression which should only match correct argument names - -argument-rgx=[a-z_][a-z0-9_]{2,30}$ - - - -# Regular expression which should only match correct variable names - -variable-rgx=[a-z_][a-z0-9_]{2,30}$ - - - -# Regular expression which should only match correct list comprehension / - -# generator expression variable names - -inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ - - - -# Good variable names which should always be accepted, separated by a comma - -good-names=i,j,k,ex,Run,_ - - - -# Bad variable names which should always be refused, separated by a comma - -bad-names=foo,bar,baz,toto,tutu,tata - - - -# List of builtins function names that should not be used, separated by a comma - -bad-functions=map,filter,apply,input - - - - - -# checks for sign of poor/misdesign: - -# * number of methods, attributes, local variables... - -# * size, complexity of functions, methods - -# - -[DESIGN] - - - -# Maximum number of arguments for function / method - -max-args=5 - - - -# Maximum number of locals for function / method body - -max-locals=15 - - - -# Maximum number of return / yield for function / method body - -max-returns=6 - - - -# Maximum number of branch for function / method body - -max-branchs=12 - - - -# Maximum number of statements in function / method body - -max-statements=50 - - - -# Maximum number of parents for a class (see R0901). - -max-parents=7 - - - -# Maximum number of attributes for a class (see R0902). - -max-attributes=7 - - - -# Minimum number of public methods for a class (see R0903). - -min-public-methods=2 - - - -# Maximum number of public methods for a class (see R0904). - -max-public-methods=20 - - - - - -# checks for - -# * external modules dependencies - -# * relative / wildcard imports - -# * cyclic imports - -# * uses of deprecated modules - -# - -[IMPORTS] - - - -# Deprecated modules which should not be used, separated by a comma - -deprecated-modules=regsub,string,TERMIOS,Bastion,rexec - - - -# Create a graph of every (i.e. internal and external) dependencies in the - -# given file (report R0402 must not be disabled) - -import-graph= - - - -# Create a graph of external dependencies in the given file (report R0402 must - -# not be disabled) - -ext-import-graph= - - - -# Create a graph of internal dependencies in the given file (report R0402 must - -# not be disabled) - -int-import-graph= - - - - - -# checks for : - -# * methods without self as first argument - -# * overridden methods signature - -# * access only to existant members via self - -# * attributes not defined in the __init__ method - -# * supported interfaces implementation - -# * unreachable code - -# - -[CLASSES] - - - -# List of interface methods to ignore, separated by a comma. This is used for - -# instance to not check methods defines in Zope's Interface base class. - -ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by - - - -# List of method names used to declare (i.e. assign) instance attributes. - -defining-attr-methods=__init__,__new__,setUp - - - - - -# checks for similarities and duplicated code. This computation may be - -# memory / CPU intensive, so you should disable it if you experiments some - -# problems. - -# - -[SIMILARITIES] - - - -# Minimum lines number of a similarity. - -min-similarity-lines=4 - - - -# Ignore comments when computing similarities. - -ignore-comments=yes - - - -# Ignore docstrings when computing similarities. - -ignore-docstrings=yes - - - - - -# checks for: - -# * warning notes in the code like FIXME, XXX - -# * PEP 263: source code with non ascii character but no encoding declaration - -# - -[MISCELLANEOUS] - - - -# List of note tags to take in consideration, separated by a comma. - -notes=FIXME,XXX,TODO - - - - - -# checks for : - -# * unauthorized constructions - -# * strict indentation - -# * line length - -# * use of <> instead of != - -# - -[FORMAT] - - - -# Maximum number of characters on a single line. - -max-line-length=80 - - - -# Maximum number of lines in a module - -max-module-lines=1000 - - - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 - -# tab). - -indent-string=' ' - diff --git a/src/calibre/constants.py b/src/calibre/constants.py index ef83336740..d9d5ee0cfc 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.4.140' +__version__ = '0.4.141' __author__ = "Kovid Goyal " ''' Various run time constants. diff --git a/src/calibre/devices/__init__.py b/src/calibre/devices/__init__.py index ed16dba14b..a673d3fe09 100644 --- a/src/calibre/devices/__init__.py +++ b/src/calibre/devices/__init__.py @@ -12,7 +12,8 @@ def devices(): from calibre.devices.cybookg3.driver import CYBOOKG3 from calibre.devices.kindle.driver import KINDLE from calibre.devices.kindle.driver import KINDLE2 - return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2) + from calibre.devices.blackberry.driver import BLACKBERRY + return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY) import time diff --git a/src/calibre/devices/blackberry/__init__.py b/src/calibre/devices/blackberry/__init__.py new file mode 100644 index 0000000000..c8c1a8f015 --- /dev/null +++ b/src/calibre/devices/blackberry/__init__.py @@ -0,0 +1,6 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + diff --git a/src/calibre/devices/blackberry/driver.py b/src/calibre/devices/blackberry/driver.py new file mode 100644 index 0000000000..f6c615b0de --- /dev/null +++ b/src/calibre/devices/blackberry/driver.py @@ -0,0 +1,30 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + +from calibre.devices.usbms.driver import USBMS + +class BLACKBERRY(USBMS): + # Ordered list of supported formats + FORMATS = ['mobi', 'prc'] + + VENDOR_ID = [0x0fca] + PRODUCT_ID = [0x8004] + BCD = [0x0200] + + VENDOR_NAME = 'RIM' + WINDOWS_MAIN_MEM = 'BLACKBERRY_SD' + #WINDOWS_CARD_MEM = 'CARD_STORAGE' + + #OSX_MAIN_MEM = 'Kindle Internal Storage Media' + #OSX_CARD_MEM = 'Kindle Card Storage Media' + + MAIN_MEMORY_VOLUME_LABEL = 'Blackberry Main Memory' + #STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card' + + EBOOK_DIR_MAIN = 'ebooks' + #EBOOK_DIR_CARD = "documents" + SUPPORTS_SUB_DIRS = True + diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py index cadc61e584..0c73c4412c 100644 --- a/src/calibre/devices/usbms/driver.py +++ b/src/calibre/devices/usbms/driver.py @@ -1,3 +1,4 @@ +from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' ''' @@ -165,8 +166,8 @@ class USBMS(Device): def get_file(self, path, outfile, end_session=True): path = self.munge_path(path) - src = open(path, 'rb') - shutil.copyfileobj(src, outfile, 10*1024*1024) + with open(path, 'rb') as src: + shutil.copyfileobj(src, outfile, 10*1024*1024) def put_file(self, infile, path, replace_file=False, end_session=True): path = self.munge_path(path) diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index 8ad41c524f..af6d724883 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -30,12 +30,50 @@ def detect(aBuf): # Added by Kovid ENCODING_PATS = [ - re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE), - re.compile(r'', re.IGNORECASE) + re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', + re.IGNORECASE), + re.compile(r'', + re.IGNORECASE) ] ENTITY_PATTERN = re.compile(r'&(\S+?);') -def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, resolve_entities=False): +def strip_encoding_declarations(raw): + for pat in ENCODING_PATS: + raw = pat.sub('', raw) + return raw + +def substitute_entites(raw): + from calibre import entity_to_unicode + from functools import partial + f = partial(entity_to_unicode, exceptions= + ['amp', 'apos', 'quot', 'lt', 'gt']) + return ENTITY_PATTERN.sub(f, raw) + +_CHARSET_ALIASES = { "macintosh" : "mac-roman", + "x-sjis" : "shift-jis" } + + +def force_encoding(raw, verbose): + from calibre.constants import preferred_encoding + try: + chardet = detect(raw) + except: + chardet = {'encoding':preferred_encoding, 'confidence':0} + encoding = chardet['encoding'] + if chardet['confidence'] < 1 and verbose: + print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100) + if not encoding: + encoding = preferred_encoding + encoding = encoding.lower() + if _CHARSET_ALIASES.has_key(encoding): + encoding = _CHARSET_ALIASES[encoding] + if encoding == 'ascii': + encoding = 'utf-8' + return encoding + + +def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, + resolve_entities=False): ''' Force conversion of byte string to unicode. Tries to look for XML/HTML encoding declaration first, if not found uses the chardet library and @@ -45,44 +83,27 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, resolve_entiti encoding = None if not raw: return u'', encoding - if isinstance(raw, unicode): - return raw, encoding - for pat in ENCODING_PATS: - match = pat.search(raw) - if match: - encoding = match.group(1) - break - if strip_encoding_pats: + if not isinstance(raw, unicode): + if raw.startswith('\xff\xfe'): + raw, encoding = raw.decode('utf-16-le')[1:], 'utf-16-le' + elif raw.startswith('\xfe\xff'): + raw, encoding = raw.decode('utf-16-be')[1:], 'utf-16-be' + if not isinstance(raw, unicode): for pat in ENCODING_PATS: - raw = pat.sub('', raw) - if encoding is None: + match = pat.search(raw) + if match: + encoding = match.group(1) + break + if encoding is None: + encoding = force_encoding(raw, verbose) try: - chardet = detect(raw) - except: - chardet = {'encoding':'utf-8', 'confidence':0} - encoding = chardet['encoding'] - if chardet['confidence'] < 1 and verbose: - print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100) - CHARSET_ALIASES = { "macintosh" : "mac-roman", - "x-sjis" : "shift-jis" } - if not encoding: - from calibre import preferred_encoding - encoding = preferred_encoding - if encoding: - encoding = encoding.lower() - if CHARSET_ALIASES.has_key(encoding): - encoding = CHARSET_ALIASES[encoding] - if encoding == 'ascii': - encoding = 'utf-8' + raw = raw.decode(encoding, 'replace') + except LookupError: + raw = raw.decode('utf-8', 'replace') - try: - raw = raw.decode(encoding, 'replace') - except LookupError: - raw = raw.decode('utf-8', 'replace') + if strip_encoding_pats: + raw = strip_encoding_declarations(raw) if resolve_entities: - from calibre import entity_to_unicode - from functools import partial - f = partial(entity_to_unicode, exceptions=['amp', 'apos', 'quot', 'lt', 'gt']) - raw = ENTITY_PATTERN.sub(f, raw) - + raw = substitute_entites(raw) + return raw, encoding diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index b4e75d1c15..d89fc3c9a9 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -273,7 +273,12 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None) hf.links.remove(link) next_level = list(nl) - return flat, list(depth_first(flat[0], flat)) + orec = sys.getrecursionlimit() + sys.setrecursionlimit(500000) + try: + return flat, list(depth_first(flat[0], flat)) + finally: + sys.setrecursionlimit(orec) def opf_traverse(opf_reader, verbose=0, encoding=None): diff --git a/src/calibre/ebooks/lrf/lrs/convert_from.py b/src/calibre/ebooks/lrf/lrs/convert_from.py index 89a0eb5d44..495d9adb50 100644 --- a/src/calibre/ebooks/lrf/lrs/convert_from.py +++ b/src/calibre/ebooks/lrf/lrs/convert_from.py @@ -73,7 +73,9 @@ class LrsParser(object): return CharButton(self.parsed_objects[tag.get('refobj')], None) if tag.name == 'plot': return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, ['refobj'])) - return map[tag.name](**self.attrs_to_dict(tag)) + settings = self.attrs_to_dict(tag) + settings.pop('spanstyle', '') + return map[tag.name](**settings) def process_text_element(self, tag, elem): for item in tag.contents: @@ -121,7 +123,8 @@ class LrsParser(object): for tag in self.soup.findAll('page'): page = self.parsed_objects[tag.get('objid')] self.book.append(page) - for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', 'ruledline']): + for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', + 'ruledline', 'simpletextblock']): if block_tag.name == 'ruledline': page.append(RuledLine(**self.attrs_to_dict(block_tag))) else: @@ -134,7 +137,7 @@ class LrsParser(object): self.book.append(jb) self.parsed_objects[tag.get('objid')] = jb - for tag in self.soup.findAll('textblock'): + for tag in self.soup.findAll(['textblock', 'simpletextblock']): self.process_text_block(tag) toc = self.soup.find('toc') if toc: @@ -145,8 +148,10 @@ class LrsParser(object): def third_pass(self): map = { - 'page' : (Page, ['pagestyle', 'evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid']), + 'page' : (Page, ['pagestyle', 'evenfooterid', + 'oddfooterid', 'evenheaderid', 'oddheaderid']), 'textblock' : (TextBlock, ['textstyle', 'blockstyle']), + 'simpletextblock' : (TextBlock, ['textstyle', 'blockstyle']), 'imageblock' : (ImageBlock, ['blockstyle', 'refstream']), 'image' : (Image, ['refstream']), 'canvas' : (Canvas, ['canvaswidth', 'canvasheight']), @@ -160,8 +165,12 @@ class LrsParser(object): if tag.name in map.keys(): settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel']) for a in ('pagestyle', 'blockstyle', 'textstyle'): - if tag.has_key(a): - settings[attrmap[a]] = self.parsed_objects[tag.get(a)] + label = tag.get(a, False) + if label: + _obj = self.parsed_objects[label] if \ + self.parsed_objects.has_key(label) else \ + self._style_labels[label] + settings[attrmap[a]] = _obj for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'): if tag.has_key(a): settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)] @@ -182,6 +191,7 @@ class LrsParser(object): 'imagestream': (ImageStream, ['imagestreamlabel']), 'registfont' : (Font, []) } + self._style_labels = {} for id, tag in self.objects.items(): if tag.name in map.keys(): settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid']) @@ -189,7 +199,11 @@ class LrsParser(object): for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'): if tag.has_key(a): settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)] + settings.pop('autoindex', '') self.parsed_objects[id] = map[tag.name][0](**settings) + x = tag.get('stylelabel', False) + if x: + self._style_labels[x] = self.parsed_objects[id] if tag.name == 'registfont': self.book.append(self.parsed_objects[id]) @@ -220,6 +234,8 @@ class LrsParser(object): def me(base, tagname): tag = base.find(tagname.lower()) + if tag is None: + return ('', '', '') tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '') return tag diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index 461210befe..1506647ca8 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -80,6 +80,7 @@ class MetadataUpdater(object): type = self.type = data[60:68] self.nrecs, = unpack('>H', data[76:78]) record0 = self.record0 = self.record(0) + self.encryption_type, = unpack('>H', record0[12:14]) codepage, = unpack('>I', record0[28:32]) self.codec = 'utf-8' if codepage == 65001 else 'cp1252' image_base, = unpack('>I', record0[108:112]) @@ -133,6 +134,8 @@ class MetadataUpdater(object): if self.thumbnail_record is not None: recs.append((202, pack('>I', self.thumbnail_rindex))) exth = StringIO() + if getattr(self, 'encryption_type', -1) != 0: + raise MobiError('Setting metadata in DRMed MOBI files is not supported.') for code, data in recs: exth.write(pack('>II', code, len(data) + 8)) exth.write(data) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 6d26c81789..df728e400e 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal ' Read data from .mobi files ''' -import sys, struct, os, cStringIO, re +import sys, struct, os, cStringIO, re, functools try: from PIL import Image as PILImage @@ -186,7 +186,9 @@ class MobiReader(object): self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore') for pat in ENCODING_PATS: self.processed_html = pat.sub('', self.processed_html) - self.processed_html = re.sub(r'&(\S+?);', entity_to_unicode, + e2u = functools.partial(entity_to_unicode, + exceptions=['lt', 'gt', 'amp', 'apos', 'quot']) + self.processed_html = re.sub(r'&(\S+?);', e2u, self.processed_html) self.extract_images(processed_records, output_dir) self.replace_page_breaks() @@ -235,7 +237,7 @@ class MobiReader(object): if self.verbose: print 'Creating OPF...' ncx = cStringIO.StringIO() - opf = self.create_opf(htmlfile, guide) + opf = self.create_opf(htmlfile, guide, root) opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx) ncx = ncx.getvalue() if ncx: @@ -328,7 +330,7 @@ class MobiReader(object): except ValueError: pass - def create_opf(self, htmlfile, guide=None): + def create_opf(self, htmlfile, guide=None, root=None): mi = self.book_header.exth.mi opf = OPFCreator(os.path.dirname(htmlfile), mi) if hasattr(self.book_header.exth, 'cover_offset'): @@ -347,21 +349,27 @@ class MobiReader(object): if ref.type.lower() == 'toc': toc = ref.href() if toc: - index = self.processed_html.find(' -1: - raw = ''+self.processed_html[index:] - root = html.fromstring(raw) + if elems: tocobj = TOC() - for a in root.xpath('//a[@href]'): - try: - text = u' '.join([t.strip() for t in a.xpath('descendant::text()')]) - except: - text = '' - text = ent_pat.sub(entity_to_unicode, text) - if a.get('href', '').startswith('#'): - tocobj.add_item(toc.partition('#')[0], a.attrib['href'][1:], text) + reached = False + for x in root.iter(): + if x == elems[-1]: + reached = True + continue + if reached and x.tag == 'a': + href = x.get('href', '') + if href: + try: + text = u' '.join([t.strip() for t in \ + x.xpath('descendant::text()')]) + except: + text = '' + text = ent_pat.sub(entity_to_unicode, text) + tocobj.add_item(toc.partition('#')[0], href[1:], + text) if tocobj is not None: opf.set_toc(tocobj) diff --git a/src/calibre/gui2/dialogs/config.py b/src/calibre/gui2/dialogs/config.py index 2b093a45b5..1d5fad960e 100644 --- a/src/calibre/gui2/dialogs/config.py +++ b/src/calibre/gui2/dialogs/config.py @@ -180,12 +180,12 @@ class ConfigDialog(QDialog, Ui_Dialog): self.toolbar_button_size.setCurrentIndex(0 if icons == self.ICON_SIZES[0] else 1 if icons == self.ICON_SIZES[1] else 2) self.show_toolbar_text.setChecked(config['show_text_in_toolbar']) - book_exts = sorted(BOOK_EXTENSIONS) - for ext in book_exts: + self.book_exts = sorted(BOOK_EXTENSIONS) + for ext in self.book_exts: self.single_format.addItem(ext.upper(), QVariant(ext)) single_format = config['save_to_disk_single_format'] - self.single_format.setCurrentIndex(book_exts.index(single_format)) + self.single_format.setCurrentIndex(self.book_exts.index(single_format)) self.cover_browse.setValue(config['cover_flow_queue_length']) self.systray_notifications.setChecked(not config['disable_tray_notification']) from calibre.translations.compiled import translations @@ -204,7 +204,7 @@ class ConfigDialog(QDialog, Ui_Dialog): self.pdf_metadata.setChecked(prefs['read_file_metadata']) added_html = False - for ext in book_exts: + for ext in self.book_exts: ext = ext.lower() ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext) if ext == 'lrf' or is_supported('book.'+ext): @@ -402,7 +402,7 @@ class ConfigDialog(QDialog, Ui_Dialog): p = {0:'normal', 1:'high', 2:'low'}[self.priority.currentIndex()] prefs['worker_process_priority'] = p prefs['read_file_metadata'] = bool(self.pdf_metadata.isChecked()) - config['save_to_disk_single_format'] = BOOK_EXTENSIONS[self.single_format.currentIndex()] + config['save_to_disk_single_format'] = self.book_exts[self.single_format.currentIndex()] config['cover_flow_queue_length'] = self.cover_browse.value() prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString()) config['systray_icon'] = self.systray_icon.checkState() == Qt.Checked diff --git a/src/calibre/gui2/dialogs/epub.py b/src/calibre/gui2/dialogs/epub.py index fb8e6bf71e..88607b1f8d 100644 --- a/src/calibre/gui2/dialogs/epub.py +++ b/src/calibre/gui2/dialogs/epub.py @@ -126,7 +126,8 @@ class Config(ResizableDialog, Ui_Dialog): pix = QPixmap() pix.loadFromData(cover) if pix.isNull(): - d = error_dialog(self.window, _file + _(" is not a valid picture")) + d = error_dialog(self.window, _('Error reading file'), + _file + _(" is not a valid picture")) d.exec_() else: self.cover_path.setText(_file) diff --git a/src/calibre/gui2/dialogs/lrf_single.py b/src/calibre/gui2/dialogs/lrf_single.py index 9083d3e4df..fdcf908d1d 100644 --- a/src/calibre/gui2/dialogs/lrf_single.py +++ b/src/calibre/gui2/dialogs/lrf_single.py @@ -255,7 +255,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog): self.gui_headerformat.setDisabled(True) self.gui_header_separation.setDisabled(True) self.gui_use_metadata_cover.setCheckState(Qt.Checked) - self.preprocess.addItem('No preprocessing') + self.preprocess.addItem(_('No preprocessing')) for opt in self.PREPROCESS_OPTIONS: self.preprocess.addItem(opt.get_opt_string()[2:]) ph = _('Preprocess the file before converting to LRF. This is useful if you know that the file is from a specific source. Known sources:') @@ -338,7 +338,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog): cmd.append(opt) text = qstring_to_unicode(self.preprocess.currentText()) - if text != 'No preprocessing': + if text != _('No preprocessing'): cmd.append(u'--'+text) cmd.extend([u'--profile', qstring_to_unicode(self.gui_profile.currentText())]) diff --git a/src/calibre/gui2/dialogs/mobi.py b/src/calibre/gui2/dialogs/mobi.py index 7d0324e0f4..b9cff08200 100644 --- a/src/calibre/gui2/dialogs/mobi.py +++ b/src/calibre/gui2/dialogs/mobi.py @@ -19,5 +19,4 @@ class Config(_Config): self.opt_dont_split_on_page_breaks.setVisible(False) self.opt_preserve_tag_structure.setVisible(False) self.opt_linearize_tables.setVisible(False) - self.opt_no_justification.setVisible(False) self.page_map_box.setVisible(False) \ No newline at end of file diff --git a/src/calibre/gui2/images/news/soldiers.png b/src/calibre/gui2/images/news/soldiers.png new file mode 100644 index 0000000000..df04f108e6 Binary files /dev/null and b/src/calibre/gui2/images/news/soldiers.png differ diff --git a/src/calibre/gui2/images/news/theonion.png b/src/calibre/gui2/images/news/theonion.png new file mode 100644 index 0000000000..d29c69562d Binary files /dev/null and b/src/calibre/gui2/images/news/theonion.png differ diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py index 79c42c2a81..c6277ff902 100644 --- a/src/calibre/gui2/viewer/main.py +++ b/src/calibre/gui2/viewer/main.py @@ -194,7 +194,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer): def __init__(self, pathtoebook=None): MainWindow.__init__(self, None) self.setupUi(self) - self.iterator = None self.current_page = None self.pending_search = None @@ -619,7 +618,7 @@ View an ebook. def main(args=sys.argv): parser = option_parser() args = parser.parse_args(args)[-1] - pid = os.fork() if islinux else -1 + pid = os.fork() if False and islinux else -1 if pid <= 0: app = Application(args) app.setWindowIcon(QIcon(':/images/viewer.svg')) diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 164f865dca..ceae3094c4 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -19,7 +19,7 @@ except: send_message = None from calibre.ebooks.metadata.meta import get_metadata from calibre.library.database2 import LibraryDatabase2 -from calibre.ebooks.metadata.opf import OPFCreator, OPFReader +from calibre.ebooks.metadata.opf2 import OPFCreator, OPF from calibre.utils.genshi.template import MarkupTemplate FIELDS = set(['title', 'authors', 'author_sort', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn', 'cover']) @@ -453,7 +453,7 @@ id is an id number from the list command. return 0 def do_set_metadata(db, id, stream): - mi = OPFReader(stream) + mi = OPF(stream) db.set_metadata(id, mi) do_show_metadata(db, id, False) if send_message is not None: diff --git a/src/calibre/linux.py b/src/calibre/linux.py index d960ef87f7..2bd1544e56 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -435,17 +435,16 @@ def post_install(): parser = option_parser() opts = parser.parse_args()[0] - if not opts.no_root and os.geteuid() != 0: - print >> sys.stderr, 'You must be root to run this command.' - sys.exit(1) - global use_destdir use_destdir = opts.destdir manifest = [] - manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors) - manifest += setup_completion(opts.fatal_errors) setup_desktop_integration(opts.fatal_errors) - manifest += install_man_pages(opts.fatal_errors) + if opts.no_root or os.geteuid() == 0: + manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors) + manifest += setup_completion(opts.fatal_errors) + manifest += install_man_pages(opts.fatal_errors) + else: + print "Skipping udev, completion, and man-page install for non-root user." try: from PyQt4 import Qt diff --git a/src/calibre/trac/plugins/download.py b/src/calibre/trac/plugins/download.py index 8a5a81ac7f..9c852c554e 100644 --- a/src/calibre/trac/plugins/download.py +++ b/src/calibre/trac/plugins/download.py @@ -15,7 +15,7 @@ DEPENDENCIES = [ ('ImageMagick', '6.3.5', 'imagemagick', 'imagemagick', 'ImageMagick'), ('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'), ('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'), - ('lxml', '2.0.5', 'lxml', 'python-lxml', 'python-lxml'), + ('lxml', '2.1.5', 'lxml', 'python-lxml', 'python-lxml'), ('python-dateutil', '1.4.1', 'python-dateutil', 'python-dateutil', 'python-dateutil'), ('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'), ('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'), diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 7ae997f90d..6018af4918 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -32,6 +32,7 @@ recipe_modules = ['recipe_' + r for r in ( 'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline', 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', + 'lamujerdemivida', 'soldiers', 'theonion', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py b/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py new file mode 100644 index 0000000000..a99be8f955 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +lamujerdemivida.com.ar +''' +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class LaMujerDeMiVida(BasicNewsRecipe): + title = 'La Mujer de mi Vida' + __author__ = 'Darko Miletic' + description = 'Cultura de otra manera' + oldest_article = 90 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'cp1252' + publisher = 'La Mujer de mi Vida' + category = 'literatura, critica, arte, ensayos' + language = _('Spanish') + INDEX = 'http://www.lamujerdemivida.com.ar/' + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + , '--ignore-tables' + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + + keep_only_tags = [dict(name='table', attrs={'width':'570'})] + + feeds = [(u'Articulos', u'http://www.lamujerdemivida.com.ar/index.php')] + + def preprocess_html(self, soup): + soup.html['xml:lang'] = 'es-AR' + soup.html['lang'] = 'es-AR' + mtag = '' + soup.head.insert(0,mtag) + for item in soup.findAll(style=True): + del item['style'] + return soup + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup(self.INDEX) + cover_item = soup.find('img',attrs={'alt':'Lamujerdemivida.'}) + if cover_item: + cover_url = self.INDEX + cover_item['src'] + return cover_url + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + for item in soup.findAll('td', attrs={'width':'390'}): + atag = item.find('a',href=True) + if atag: + url = atag['href'] + title = self.tag_to_string(atag) + date = strftime(self.timefmt) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':'' + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds + diff --git a/src/calibre/web/feeds/recipes/recipe_soldiers.py b/src/calibre/web/feeds/recipes/recipe_soldiers.py new file mode 100644 index 0000000000..dfaa070928 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_soldiers.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' +''' +www.army.mil/soldiers/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Soldiers(BasicNewsRecipe): + title = 'Soldiers' + __author__ = 'Darko Miletic' + description = 'The Official U.S. Army Magazine' + oldest_article = 30 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + remove_javascript = True + simultaneous_downloads = 1 + delay = 4 + max_connections = 1 + encoding = 'utf-8' + publisher = 'U.S. Army' + category = 'news, politics, war, weapons' + language = _('English') + INDEX = 'http://www.army.mil/soldiers/' + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + keep_only_tags = [dict(name='div', attrs={'id':'rightCol'})] + + remove_tags = [ + dict(name='div', attrs={'id':['addThis','comment','articleFooter']}) + ,dict(name=['object','link']) + ] + + feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup(self.INDEX) + cover_item = soup.find('img',attrs={'alt':'Current Magazine Cover'}) + if cover_item: + cover_url = cover_item['src'] + return cover_url diff --git a/src/calibre/web/feeds/recipes/recipe_theonion.py b/src/calibre/web/feeds/recipes/recipe_theonion.py new file mode 100644 index 0000000000..06f7edd32b --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_theonion.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +theonion.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class TheOnion(BasicNewsRecipe): + title = 'The Onion' + __author__ = 'Darko Miletic' + description = "America's finest news source" + oldest_article = 2 + max_articles_per_feed = 100 + publisher = u'Onion, Inc.' + category = u'humor, news, USA' + language = _('English') + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + remove_javascript = True + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + + html2lrf_options = [ + '--comment' , description + , '--category' , category + , '--publisher' , publisher + ] + + keep_only_tags = [dict(name='div', attrs={'id':'main'})] + + remove_tags = [ + dict(name=['object','link','iframe','base']) + ,dict(name='div', attrs={'class':['toolbar_side','graphical_feature','toolbar_bottom']}) + ,dict(name='div', attrs={'id':['recent_slider','sidebar','pagination','related_media']}) + ] + + + feeds = [ + (u'Daily' , u'http://feeds.theonion.com/theonion/daily' ) + ,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' ) + ]