mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
549e2b9efb
@ -1,50 +0,0 @@
|
||||
[epydoc] # Epydoc section marker (required by ConfigParser)
|
||||
|
||||
# Information about the project.
|
||||
name: calibre
|
||||
url: http://calibre.kovidgoyal.net
|
||||
|
||||
# The list of modules to document. Modules can be named using
|
||||
# dotted names, module filenames, or package directory names.
|
||||
# This option may be repeated.
|
||||
modules: calibre.devices, calibre.ebooks.lrf.web.profiles
|
||||
|
||||
output: pdf
|
||||
target: docs/pdf
|
||||
|
||||
frames: no
|
||||
|
||||
# graph
|
||||
# The list of graph types that should be automatically included
|
||||
# in the output. Graphs are generated using the Graphviz "dot"
|
||||
# executable. Graph types include: "classtree", "callgraph",
|
||||
# "umlclass". Use "all" to include all graph types
|
||||
graph: classtree
|
||||
|
||||
# css
|
||||
# The CSS stylesheet for HTML output. Can be the name of a builtin
|
||||
# stylesheet, or the name of a file.
|
||||
css: white
|
||||
|
||||
# link
|
||||
# HTML code for the project link in the navigation bar. If left
|
||||
# unspecified, the project link will be generated based on the
|
||||
# project's name and URL.
|
||||
#link: <a href="somewhere">My Cool Project</a>
|
||||
|
||||
# top
|
||||
# The "top" page for the documentation. Can be a URL, the name
|
||||
# of a module or class, or one of the special names "trees.html",
|
||||
# "indices.html", or "help.html"
|
||||
# top: calibre
|
||||
|
||||
# verbosity
|
||||
# An integer indicating how verbose epydoc should be. The default
|
||||
# value is 0; negative values will supress warnings and errors;
|
||||
# positive values will give more verbose output.
|
||||
#verbosity: 0
|
||||
|
||||
# separate-classes
|
||||
# Whether each class should be listed in its own section when
|
||||
# generating LaTeX or PDF output.
|
||||
#separate-classes: no
|
51
epydoc.conf
51
epydoc.conf
@ -1,51 +0,0 @@
|
||||
[epydoc] # Epydoc section marker (required by ConfigParser)
|
||||
|
||||
# Information about the project.
|
||||
name: calibre - API documentation
|
||||
url: http://calibre.kovidgoyal.net
|
||||
|
||||
# The list of modules to document. Modules can be named using
|
||||
# dotted names, module filenames, or package directory names.
|
||||
# This option may be repeated.
|
||||
modules: calibre.devices, calibre.ebooks.lrf.web.profiles
|
||||
|
||||
# Write html output to the directory "docs"
|
||||
output: html
|
||||
target: docs/html
|
||||
|
||||
frames: no
|
||||
|
||||
# graph
|
||||
# The list of graph types that should be automatically included
|
||||
# in the output. Graphs are generated using the Graphviz "dot"
|
||||
# executable. Graph types include: "classtree", "callgraph",
|
||||
# "umlclass". Use "all" to include all graph types
|
||||
graph: classtree
|
||||
|
||||
# css
|
||||
# The CSS stylesheet for HTML output. Can be the name of a builtin
|
||||
# stylesheet, or the name of a file.
|
||||
css: white
|
||||
|
||||
# link
|
||||
# HTML code for the project link in the navigation bar. If left
|
||||
# unspecified, the project link will be generated based on the
|
||||
# project's name and URL.
|
||||
link: <a href="http://calibre.kovidgoyal.net">calibre</a>
|
||||
|
||||
# top
|
||||
# The "top" page for the documentation. Can be a URL, the name
|
||||
# of a module or class, or one of the special names "trees.html",
|
||||
# "indices.html", or "help.html"
|
||||
#top:
|
||||
|
||||
# verbosity
|
||||
# An integer indicating how verbose epydoc should be. The default
|
||||
# value is 0; negative values will supress warnings and errors;
|
||||
# positive values will give more verbose output.
|
||||
#verbosity: 0
|
||||
|
||||
# separate-classes
|
||||
# Whether each class should be listed in its own section when
|
||||
# generating LaTeX or PDF output.
|
||||
#separate-classes: no
|
602
pylint.conf
602
pylint.conf
@ -1,602 +0,0 @@
|
||||
# lint Python modules using external checkers.
|
||||
|
||||
#
|
||||
|
||||
# This is the main checker controling the other ones and the reports
|
||||
|
||||
# generation. It is itself both a raw checker and an astng checker in order
|
||||
|
||||
# to:
|
||||
|
||||
# * handle message activation / deactivation at the module level
|
||||
|
||||
# * handle some basic but necessary stats'data (number of classes, methods...)
|
||||
|
||||
#
|
||||
|
||||
[MASTER]
|
||||
|
||||
|
||||
|
||||
# Specify a configuration file.
|
||||
|
||||
#rcfile=
|
||||
|
||||
|
||||
|
||||
# Profiled execution.
|
||||
|
||||
profile=no
|
||||
|
||||
|
||||
|
||||
# Add <file or directory> to the black list. It should be a base name, not a
|
||||
|
||||
# path. You may set this option multiple times.
|
||||
|
||||
ignore=CVS
|
||||
|
||||
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
|
||||
persistent=yes
|
||||
|
||||
|
||||
|
||||
# Set the cache size for astng objects.
|
||||
|
||||
cache-size=500
|
||||
|
||||
|
||||
|
||||
# List of plugins (as comma separated values of python modules names) to load,
|
||||
|
||||
# usually to register additional checkers.
|
||||
|
||||
load-plugins=
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
|
||||
|
||||
# Enable only checker(s) with the given id(s). This option conflict with the
|
||||
|
||||
# disable-checker option
|
||||
|
||||
#enable-checker=
|
||||
|
||||
|
||||
|
||||
# Enable all checker(s) except those with the given id(s). This option conflict
|
||||
|
||||
# with the disable-checker option
|
||||
|
||||
#disable-checker=
|
||||
|
||||
|
||||
|
||||
# Enable all messages in the listed categories.
|
||||
|
||||
#enable-msg-cat=
|
||||
|
||||
|
||||
|
||||
# Disable all messages in the listed categories.
|
||||
|
||||
#disable-msg-cat=
|
||||
|
||||
|
||||
|
||||
# Enable the message(s) with the given id(s).
|
||||
|
||||
#enable-msg=
|
||||
|
||||
|
||||
|
||||
# Disable the message(s) with the given id(s).
|
||||
|
||||
#disable-msg=
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
|
||||
|
||||
# set the output format. Available formats are text, parseable, colorized and
|
||||
|
||||
# html
|
||||
|
||||
output-format=colorized
|
||||
|
||||
|
||||
|
||||
# Include message's id in output
|
||||
|
||||
include-ids=no
|
||||
|
||||
|
||||
|
||||
# Put messages in a separate file for each module / package specified on the
|
||||
|
||||
# command line instead of printing them on stdout. Reports (if any) will be
|
||||
|
||||
# written in a file name "pylint_global.[txt|html]".
|
||||
|
||||
files-output=no
|
||||
|
||||
|
||||
|
||||
# Tells wether to display a full report or only the messages
|
||||
|
||||
reports=yes
|
||||
|
||||
|
||||
|
||||
# Python expression which should return a note less than 10 (10 is the highest
|
||||
|
||||
# note).You have access to the variables errors warning, statement which
|
||||
|
||||
# respectivly contain the number of errors / warnings messages and the total
|
||||
|
||||
# number of statements analyzed. This is used by the global evaluation report
|
||||
|
||||
# (R0004).
|
||||
|
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
||||
|
||||
|
||||
|
||||
# Add a comment according to your evaluation note. This is used by the global
|
||||
|
||||
# evaluation report (R0004).
|
||||
|
||||
comment=no
|
||||
|
||||
|
||||
|
||||
# Enable the report(s) with the given id(s).
|
||||
|
||||
#enable-report=
|
||||
|
||||
|
||||
|
||||
# Disable the report(s) with the given id(s).
|
||||
|
||||
#disable-report=
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# checks for
|
||||
|
||||
# * unused variables / imports
|
||||
|
||||
# * undefined variables
|
||||
|
||||
# * redefinition of variable from builtins or from an outer scope
|
||||
|
||||
# * use of variable before assigment
|
||||
|
||||
#
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
|
||||
|
||||
# Tells wether we should check for unused import in __init__ files.
|
||||
|
||||
init-import=no
|
||||
|
||||
|
||||
|
||||
# A regular expression matching names used for dummy variables (i.e. not used).
|
||||
|
||||
dummy-variables-rgx=_|dummy
|
||||
|
||||
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
|
||||
# you should avoid to define new builtins when possible.
|
||||
|
||||
additional-builtins=
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# try to find bugs in the code using type inference
|
||||
|
||||
#
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
|
||||
|
||||
# Tells wether missing members accessed in mixin class should be ignored. A
|
||||
|
||||
# mixin class is detected if its name ends with "mixin" (case insensitive).
|
||||
|
||||
ignore-mixin-members=yes
|
||||
|
||||
|
||||
|
||||
# When zope mode is activated, consider the acquired-members option to ignore
|
||||
|
||||
# access to some undefined attributes.
|
||||
|
||||
zope=no
|
||||
|
||||
|
||||
|
||||
# List of members which are usually get through zope's acquisition mecanism and
|
||||
|
||||
# so shouldn't trigger E0201 when accessed (need zope=yes to be considered).
|
||||
|
||||
acquired-members=REQUEST,acl_users,aq_parent
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# checks for :
|
||||
|
||||
# * doc strings
|
||||
|
||||
# * modules / classes / functions / methods / arguments / variables name
|
||||
|
||||
# * number of arguments, local variables, branchs, returns and statements in
|
||||
|
||||
# functions, methods
|
||||
|
||||
# * required module attributes
|
||||
|
||||
# * dangerous default values as arguments
|
||||
|
||||
# * redefinition of function / method / class
|
||||
|
||||
# * uses of the global statement
|
||||
|
||||
#
|
||||
|
||||
[BASIC]
|
||||
|
||||
|
||||
|
||||
# Required attributes for module, separated by a comma
|
||||
|
||||
required-attributes=
|
||||
|
||||
|
||||
|
||||
# Regular expression which should only match functions or classes name which do
|
||||
|
||||
# not require a docstring
|
||||
|
||||
no-docstring-rgx=__.*__
|
||||
|
||||
|
||||
|
||||
# Regular expression which should only match correct module names
|
||||
|
||||
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
|
||||
|
||||
|
||||
|
||||
# Regular expression which should only match correct module level names
|
||||
|
||||
const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$
|
||||
|
||||
|
||||
|
||||
# Regular expression which should only match correct class names
|
||||
|
||||
class-rgx=[A-Z_][a-zA-Z0-9]+$
|
||||
|
||||
|
||||
|
||||
# Regular expression which should only match correct function names
|
||||
|
||||
function-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
|
||||
|
||||
# Regular expression which should only match correct method names
|
||||
|
||||
method-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
|
||||
|
||||
# Regular expression which should only match correct instance attribute names
|
||||
|
||||
attr-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
|
||||
|
||||
# Regular expression which should only match correct argument names
|
||||
|
||||
argument-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
|
||||
|
||||
# Regular expression which should only match correct variable names
|
||||
|
||||
variable-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
|
||||
|
||||
# Regular expression which should only match correct list comprehension /
|
||||
|
||||
# generator expression variable names
|
||||
|
||||
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
|
||||
|
||||
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma
|
||||
|
||||
good-names=i,j,k,ex,Run,_
|
||||
|
||||
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma
|
||||
|
||||
bad-names=foo,bar,baz,toto,tutu,tata
|
||||
|
||||
|
||||
|
||||
# List of builtins function names that should not be used, separated by a comma
|
||||
|
||||
bad-functions=map,filter,apply,input
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# checks for sign of poor/misdesign:
|
||||
|
||||
# * number of methods, attributes, local variables...
|
||||
|
||||
# * size, complexity of functions, methods
|
||||
|
||||
#
|
||||
|
||||
[DESIGN]
|
||||
|
||||
|
||||
|
||||
# Maximum number of arguments for function / method
|
||||
|
||||
max-args=5
|
||||
|
||||
|
||||
|
||||
# Maximum number of locals for function / method body
|
||||
|
||||
max-locals=15
|
||||
|
||||
|
||||
|
||||
# Maximum number of return / yield for function / method body
|
||||
|
||||
max-returns=6
|
||||
|
||||
|
||||
|
||||
# Maximum number of branch for function / method body
|
||||
|
||||
max-branchs=12
|
||||
|
||||
|
||||
|
||||
# Maximum number of statements in function / method body
|
||||
|
||||
max-statements=50
|
||||
|
||||
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
|
||||
max-parents=7
|
||||
|
||||
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
|
||||
max-attributes=7
|
||||
|
||||
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
|
||||
min-public-methods=2
|
||||
|
||||
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
|
||||
max-public-methods=20
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# checks for
|
||||
|
||||
# * external modules dependencies
|
||||
|
||||
# * relative / wildcard imports
|
||||
|
||||
# * cyclic imports
|
||||
|
||||
# * uses of deprecated modules
|
||||
|
||||
#
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma
|
||||
|
||||
deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
|
||||
|
||||
|
||||
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the
|
||||
|
||||
# given file (report R0402 must not be disabled)
|
||||
|
||||
import-graph=
|
||||
|
||||
|
||||
|
||||
# Create a graph of external dependencies in the given file (report R0402 must
|
||||
|
||||
# not be disabled)
|
||||
|
||||
ext-import-graph=
|
||||
|
||||
|
||||
|
||||
# Create a graph of internal dependencies in the given file (report R0402 must
|
||||
|
||||
# not be disabled)
|
||||
|
||||
int-import-graph=
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# checks for :
|
||||
|
||||
# * methods without self as first argument
|
||||
|
||||
# * overridden methods signature
|
||||
|
||||
# * access only to existant members via self
|
||||
|
||||
# * attributes not defined in the __init__ method
|
||||
|
||||
# * supported interfaces implementation
|
||||
|
||||
# * unreachable code
|
||||
|
||||
#
|
||||
|
||||
[CLASSES]
|
||||
|
||||
|
||||
|
||||
# List of interface methods to ignore, separated by a comma. This is used for
|
||||
|
||||
# instance to not check methods defines in Zope's Interface base class.
|
||||
|
||||
ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
|
||||
|
||||
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
|
||||
defining-attr-methods=__init__,__new__,setUp
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# checks for similarities and duplicated code. This computation may be
|
||||
|
||||
# memory / CPU intensive, so you should disable it if you experiments some
|
||||
|
||||
# problems.
|
||||
|
||||
#
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
|
||||
min-similarity-lines=4
|
||||
|
||||
|
||||
|
||||
# Ignore comments when computing similarities.
|
||||
|
||||
ignore-comments=yes
|
||||
|
||||
|
||||
|
||||
# Ignore docstrings when computing similarities.
|
||||
|
||||
ignore-docstrings=yes
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# checks for:
|
||||
|
||||
# * warning notes in the code like FIXME, XXX
|
||||
|
||||
# * PEP 263: source code with non ascii character but no encoding declaration
|
||||
|
||||
#
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
|
||||
notes=FIXME,XXX,TODO
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# checks for :
|
||||
|
||||
# * unauthorized constructions
|
||||
|
||||
# * strict indentation
|
||||
|
||||
# * line length
|
||||
|
||||
# * use of <> instead of !=
|
||||
|
||||
#
|
||||
|
||||
[FORMAT]
|
||||
|
||||
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
|
||||
max-line-length=80
|
||||
|
||||
|
||||
|
||||
# Maximum number of lines in a module
|
||||
|
||||
max-module-lines=1000
|
||||
|
||||
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
|
||||
# tab).
|
||||
|
||||
indent-string=' '
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.4.140'
|
||||
__version__ = '0.4.141'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
'''
|
||||
Various run time constants.
|
||||
|
@ -12,7 +12,8 @@ def devices():
|
||||
from calibre.devices.cybookg3.driver import CYBOOKG3
|
||||
from calibre.devices.kindle.driver import KINDLE
|
||||
from calibre.devices.kindle.driver import KINDLE2
|
||||
return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2)
|
||||
from calibre.devices.blackberry.driver import BLACKBERRY
|
||||
return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY)
|
||||
|
||||
import time
|
||||
|
||||
|
6
src/calibre/devices/blackberry/__init__.py
Normal file
6
src/calibre/devices/blackberry/__init__.py
Normal file
@ -0,0 +1,6 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
30
src/calibre/devices/blackberry/driver.py
Normal file
30
src/calibre/devices/blackberry/driver.py
Normal file
@ -0,0 +1,30 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
from calibre.devices.usbms.driver import USBMS
|
||||
|
||||
class BLACKBERRY(USBMS):
|
||||
# Ordered list of supported formats
|
||||
FORMATS = ['mobi', 'prc']
|
||||
|
||||
VENDOR_ID = [0x0fca]
|
||||
PRODUCT_ID = [0x8004]
|
||||
BCD = [0x0200]
|
||||
|
||||
VENDOR_NAME = 'RIM'
|
||||
WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
|
||||
#WINDOWS_CARD_MEM = 'CARD_STORAGE'
|
||||
|
||||
#OSX_MAIN_MEM = 'Kindle Internal Storage Media'
|
||||
#OSX_CARD_MEM = 'Kindle Card Storage Media'
|
||||
|
||||
MAIN_MEMORY_VOLUME_LABEL = 'Blackberry Main Memory'
|
||||
#STORAGE_CARD_VOLUME_LABEL = 'Kindle Storage Card'
|
||||
|
||||
EBOOK_DIR_MAIN = 'ebooks'
|
||||
#EBOOK_DIR_CARD = "documents"
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
@ -1,3 +1,4 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john at nachtimwald.com>'
|
||||
'''
|
||||
@ -165,8 +166,8 @@ class USBMS(Device):
|
||||
|
||||
def get_file(self, path, outfile, end_session=True):
|
||||
path = self.munge_path(path)
|
||||
src = open(path, 'rb')
|
||||
shutil.copyfileobj(src, outfile, 10*1024*1024)
|
||||
with open(path, 'rb') as src:
|
||||
shutil.copyfileobj(src, outfile, 10*1024*1024)
|
||||
|
||||
def put_file(self, infile, path, replace_file=False, end_session=True):
|
||||
path = self.munge_path(path)
|
||||
|
@ -30,12 +30,50 @@ def detect(aBuf):
|
||||
|
||||
# Added by Kovid
|
||||
ENCODING_PATS = [
|
||||
re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE),
|
||||
re.compile(r'<meta.*?content=[\'"].*?charset=([^\s\'"]+).*?[\'"].*?>', re.IGNORECASE)
|
||||
re.compile(r'<\?[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>',
|
||||
re.IGNORECASE),
|
||||
re.compile(r'<meta.*?content=[\'"].*?charset=([^\s\'"]+).*?[\'"].*?>',
|
||||
re.IGNORECASE)
|
||||
]
|
||||
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
||||
|
||||
def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, resolve_entities=False):
|
||||
def strip_encoding_declarations(raw):
|
||||
for pat in ENCODING_PATS:
|
||||
raw = pat.sub('', raw)
|
||||
return raw
|
||||
|
||||
def substitute_entites(raw):
|
||||
from calibre import entity_to_unicode
|
||||
from functools import partial
|
||||
f = partial(entity_to_unicode, exceptions=
|
||||
['amp', 'apos', 'quot', 'lt', 'gt'])
|
||||
return ENTITY_PATTERN.sub(f, raw)
|
||||
|
||||
_CHARSET_ALIASES = { "macintosh" : "mac-roman",
|
||||
"x-sjis" : "shift-jis" }
|
||||
|
||||
|
||||
def force_encoding(raw, verbose):
|
||||
from calibre.constants import preferred_encoding
|
||||
try:
|
||||
chardet = detect(raw)
|
||||
except:
|
||||
chardet = {'encoding':preferred_encoding, 'confidence':0}
|
||||
encoding = chardet['encoding']
|
||||
if chardet['confidence'] < 1 and verbose:
|
||||
print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
|
||||
if not encoding:
|
||||
encoding = preferred_encoding
|
||||
encoding = encoding.lower()
|
||||
if _CHARSET_ALIASES.has_key(encoding):
|
||||
encoding = _CHARSET_ALIASES[encoding]
|
||||
if encoding == 'ascii':
|
||||
encoding = 'utf-8'
|
||||
return encoding
|
||||
|
||||
|
||||
def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
||||
resolve_entities=False):
|
||||
'''
|
||||
Force conversion of byte string to unicode. Tries to look for XML/HTML
|
||||
encoding declaration first, if not found uses the chardet library and
|
||||
@ -45,44 +83,27 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, resolve_entiti
|
||||
encoding = None
|
||||
if not raw:
|
||||
return u'', encoding
|
||||
if isinstance(raw, unicode):
|
||||
return raw, encoding
|
||||
for pat in ENCODING_PATS:
|
||||
match = pat.search(raw)
|
||||
if match:
|
||||
encoding = match.group(1)
|
||||
break
|
||||
if strip_encoding_pats:
|
||||
if not isinstance(raw, unicode):
|
||||
if raw.startswith('\xff\xfe'):
|
||||
raw, encoding = raw.decode('utf-16-le')[1:], 'utf-16-le'
|
||||
elif raw.startswith('\xfe\xff'):
|
||||
raw, encoding = raw.decode('utf-16-be')[1:], 'utf-16-be'
|
||||
if not isinstance(raw, unicode):
|
||||
for pat in ENCODING_PATS:
|
||||
raw = pat.sub('', raw)
|
||||
if encoding is None:
|
||||
match = pat.search(raw)
|
||||
if match:
|
||||
encoding = match.group(1)
|
||||
break
|
||||
if encoding is None:
|
||||
encoding = force_encoding(raw, verbose)
|
||||
try:
|
||||
chardet = detect(raw)
|
||||
except:
|
||||
chardet = {'encoding':'utf-8', 'confidence':0}
|
||||
encoding = chardet['encoding']
|
||||
if chardet['confidence'] < 1 and verbose:
|
||||
print 'WARNING: Encoding detection confidence %d%%'%(chardet['confidence']*100)
|
||||
CHARSET_ALIASES = { "macintosh" : "mac-roman",
|
||||
"x-sjis" : "shift-jis" }
|
||||
if not encoding:
|
||||
from calibre import preferred_encoding
|
||||
encoding = preferred_encoding
|
||||
if encoding:
|
||||
encoding = encoding.lower()
|
||||
if CHARSET_ALIASES.has_key(encoding):
|
||||
encoding = CHARSET_ALIASES[encoding]
|
||||
if encoding == 'ascii':
|
||||
encoding = 'utf-8'
|
||||
raw = raw.decode(encoding, 'replace')
|
||||
except LookupError:
|
||||
raw = raw.decode('utf-8', 'replace')
|
||||
|
||||
try:
|
||||
raw = raw.decode(encoding, 'replace')
|
||||
except LookupError:
|
||||
raw = raw.decode('utf-8', 'replace')
|
||||
if strip_encoding_pats:
|
||||
raw = strip_encoding_declarations(raw)
|
||||
if resolve_entities:
|
||||
from calibre import entity_to_unicode
|
||||
from functools import partial
|
||||
f = partial(entity_to_unicode, exceptions=['amp', 'apos', 'quot', 'lt', 'gt'])
|
||||
raw = ENTITY_PATTERN.sub(f, raw)
|
||||
|
||||
raw = substitute_entites(raw)
|
||||
|
||||
return raw, encoding
|
||||
|
@ -273,7 +273,12 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
|
||||
hf.links.remove(link)
|
||||
|
||||
next_level = list(nl)
|
||||
return flat, list(depth_first(flat[0], flat))
|
||||
orec = sys.getrecursionlimit()
|
||||
sys.setrecursionlimit(500000)
|
||||
try:
|
||||
return flat, list(depth_first(flat[0], flat))
|
||||
finally:
|
||||
sys.setrecursionlimit(orec)
|
||||
|
||||
|
||||
def opf_traverse(opf_reader, verbose=0, encoding=None):
|
||||
|
@ -73,7 +73,9 @@ class LrsParser(object):
|
||||
return CharButton(self.parsed_objects[tag.get('refobj')], None)
|
||||
if tag.name == 'plot':
|
||||
return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, ['refobj']))
|
||||
return map[tag.name](**self.attrs_to_dict(tag))
|
||||
settings = self.attrs_to_dict(tag)
|
||||
settings.pop('spanstyle', '')
|
||||
return map[tag.name](**settings)
|
||||
|
||||
def process_text_element(self, tag, elem):
|
||||
for item in tag.contents:
|
||||
@ -121,7 +123,8 @@ class LrsParser(object):
|
||||
for tag in self.soup.findAll('page'):
|
||||
page = self.parsed_objects[tag.get('objid')]
|
||||
self.book.append(page)
|
||||
for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', 'ruledline']):
|
||||
for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock',
|
||||
'ruledline', 'simpletextblock']):
|
||||
if block_tag.name == 'ruledline':
|
||||
page.append(RuledLine(**self.attrs_to_dict(block_tag)))
|
||||
else:
|
||||
@ -134,7 +137,7 @@ class LrsParser(object):
|
||||
self.book.append(jb)
|
||||
self.parsed_objects[tag.get('objid')] = jb
|
||||
|
||||
for tag in self.soup.findAll('textblock'):
|
||||
for tag in self.soup.findAll(['textblock', 'simpletextblock']):
|
||||
self.process_text_block(tag)
|
||||
toc = self.soup.find('toc')
|
||||
if toc:
|
||||
@ -145,8 +148,10 @@ class LrsParser(object):
|
||||
|
||||
def third_pass(self):
|
||||
map = {
|
||||
'page' : (Page, ['pagestyle', 'evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid']),
|
||||
'page' : (Page, ['pagestyle', 'evenfooterid',
|
||||
'oddfooterid', 'evenheaderid', 'oddheaderid']),
|
||||
'textblock' : (TextBlock, ['textstyle', 'blockstyle']),
|
||||
'simpletextblock' : (TextBlock, ['textstyle', 'blockstyle']),
|
||||
'imageblock' : (ImageBlock, ['blockstyle', 'refstream']),
|
||||
'image' : (Image, ['refstream']),
|
||||
'canvas' : (Canvas, ['canvaswidth', 'canvasheight']),
|
||||
@ -160,8 +165,12 @@ class LrsParser(object):
|
||||
if tag.name in map.keys():
|
||||
settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel'])
|
||||
for a in ('pagestyle', 'blockstyle', 'textstyle'):
|
||||
if tag.has_key(a):
|
||||
settings[attrmap[a]] = self.parsed_objects[tag.get(a)]
|
||||
label = tag.get(a, False)
|
||||
if label:
|
||||
_obj = self.parsed_objects[label] if \
|
||||
self.parsed_objects.has_key(label) else \
|
||||
self._style_labels[label]
|
||||
settings[attrmap[a]] = _obj
|
||||
for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'):
|
||||
if tag.has_key(a):
|
||||
settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
|
||||
@ -182,6 +191,7 @@ class LrsParser(object):
|
||||
'imagestream': (ImageStream, ['imagestreamlabel']),
|
||||
'registfont' : (Font, [])
|
||||
}
|
||||
self._style_labels = {}
|
||||
for id, tag in self.objects.items():
|
||||
if tag.name in map.keys():
|
||||
settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid'])
|
||||
@ -189,7 +199,11 @@ class LrsParser(object):
|
||||
for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'):
|
||||
if tag.has_key(a):
|
||||
settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
|
||||
settings.pop('autoindex', '')
|
||||
self.parsed_objects[id] = map[tag.name][0](**settings)
|
||||
x = tag.get('stylelabel', False)
|
||||
if x:
|
||||
self._style_labels[x] = self.parsed_objects[id]
|
||||
if tag.name == 'registfont':
|
||||
self.book.append(self.parsed_objects[id])
|
||||
|
||||
@ -220,6 +234,8 @@ class LrsParser(object):
|
||||
|
||||
def me(base, tagname):
|
||||
tag = base.find(tagname.lower())
|
||||
if tag is None:
|
||||
return ('', '', '')
|
||||
tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '')
|
||||
return tag
|
||||
|
||||
|
@ -80,6 +80,7 @@ class MetadataUpdater(object):
|
||||
type = self.type = data[60:68]
|
||||
self.nrecs, = unpack('>H', data[76:78])
|
||||
record0 = self.record0 = self.record(0)
|
||||
self.encryption_type, = unpack('>H', record0[12:14])
|
||||
codepage, = unpack('>I', record0[28:32])
|
||||
self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
|
||||
image_base, = unpack('>I', record0[108:112])
|
||||
@ -133,6 +134,8 @@ class MetadataUpdater(object):
|
||||
if self.thumbnail_record is not None:
|
||||
recs.append((202, pack('>I', self.thumbnail_rindex)))
|
||||
exth = StringIO()
|
||||
if getattr(self, 'encryption_type', -1) != 0:
|
||||
raise MobiError('Setting metadata in DRMed MOBI files is not supported.')
|
||||
for code, data in recs:
|
||||
exth.write(pack('>II', code, len(data) + 8))
|
||||
exth.write(data)
|
||||
|
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
Read data from .mobi files
|
||||
'''
|
||||
|
||||
import sys, struct, os, cStringIO, re
|
||||
import sys, struct, os, cStringIO, re, functools
|
||||
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
@ -186,7 +186,9 @@ class MobiReader(object):
|
||||
self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
|
||||
for pat in ENCODING_PATS:
|
||||
self.processed_html = pat.sub('', self.processed_html)
|
||||
self.processed_html = re.sub(r'&(\S+?);', entity_to_unicode,
|
||||
e2u = functools.partial(entity_to_unicode,
|
||||
exceptions=['lt', 'gt', 'amp', 'apos', 'quot'])
|
||||
self.processed_html = re.sub(r'&(\S+?);', e2u,
|
||||
self.processed_html)
|
||||
self.extract_images(processed_records, output_dir)
|
||||
self.replace_page_breaks()
|
||||
@ -235,7 +237,7 @@ class MobiReader(object):
|
||||
if self.verbose:
|
||||
print 'Creating OPF...'
|
||||
ncx = cStringIO.StringIO()
|
||||
opf = self.create_opf(htmlfile, guide)
|
||||
opf = self.create_opf(htmlfile, guide, root)
|
||||
opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
|
||||
ncx = ncx.getvalue()
|
||||
if ncx:
|
||||
@ -328,7 +330,7 @@ class MobiReader(object):
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def create_opf(self, htmlfile, guide=None):
|
||||
def create_opf(self, htmlfile, guide=None, root=None):
|
||||
mi = self.book_header.exth.mi
|
||||
opf = OPFCreator(os.path.dirname(htmlfile), mi)
|
||||
if hasattr(self.book_header.exth, 'cover_offset'):
|
||||
@ -347,21 +349,27 @@ class MobiReader(object):
|
||||
if ref.type.lower() == 'toc':
|
||||
toc = ref.href()
|
||||
if toc:
|
||||
index = self.processed_html.find('<a id="%s" name="%s"'%(toc.partition('#')[-1], toc.partition('#')[-1]))
|
||||
elems = root.xpath('//*[@id="%s"]'%toc.partition('#')[-1])
|
||||
tocobj = None
|
||||
ent_pat = re.compile(r'&(\S+?);')
|
||||
if index > -1:
|
||||
raw = '<html><body>'+self.processed_html[index:]
|
||||
root = html.fromstring(raw)
|
||||
if elems:
|
||||
tocobj = TOC()
|
||||
for a in root.xpath('//a[@href]'):
|
||||
try:
|
||||
text = u' '.join([t.strip() for t in a.xpath('descendant::text()')])
|
||||
except:
|
||||
text = ''
|
||||
text = ent_pat.sub(entity_to_unicode, text)
|
||||
if a.get('href', '').startswith('#'):
|
||||
tocobj.add_item(toc.partition('#')[0], a.attrib['href'][1:], text)
|
||||
reached = False
|
||||
for x in root.iter():
|
||||
if x == elems[-1]:
|
||||
reached = True
|
||||
continue
|
||||
if reached and x.tag == 'a':
|
||||
href = x.get('href', '')
|
||||
if href:
|
||||
try:
|
||||
text = u' '.join([t.strip() for t in \
|
||||
x.xpath('descendant::text()')])
|
||||
except:
|
||||
text = ''
|
||||
text = ent_pat.sub(entity_to_unicode, text)
|
||||
tocobj.add_item(toc.partition('#')[0], href[1:],
|
||||
text)
|
||||
if tocobj is not None:
|
||||
opf.set_toc(tocobj)
|
||||
|
||||
|
@ -180,12 +180,12 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
||||
self.toolbar_button_size.setCurrentIndex(0 if icons == self.ICON_SIZES[0] else 1 if icons == self.ICON_SIZES[1] else 2)
|
||||
self.show_toolbar_text.setChecked(config['show_text_in_toolbar'])
|
||||
|
||||
book_exts = sorted(BOOK_EXTENSIONS)
|
||||
for ext in book_exts:
|
||||
self.book_exts = sorted(BOOK_EXTENSIONS)
|
||||
for ext in self.book_exts:
|
||||
self.single_format.addItem(ext.upper(), QVariant(ext))
|
||||
|
||||
single_format = config['save_to_disk_single_format']
|
||||
self.single_format.setCurrentIndex(book_exts.index(single_format))
|
||||
self.single_format.setCurrentIndex(self.book_exts.index(single_format))
|
||||
self.cover_browse.setValue(config['cover_flow_queue_length'])
|
||||
self.systray_notifications.setChecked(not config['disable_tray_notification'])
|
||||
from calibre.translations.compiled import translations
|
||||
@ -204,7 +204,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
||||
self.pdf_metadata.setChecked(prefs['read_file_metadata'])
|
||||
|
||||
added_html = False
|
||||
for ext in book_exts:
|
||||
for ext in self.book_exts:
|
||||
ext = ext.lower()
|
||||
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
|
||||
if ext == 'lrf' or is_supported('book.'+ext):
|
||||
@ -402,7 +402,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
|
||||
p = {0:'normal', 1:'high', 2:'low'}[self.priority.currentIndex()]
|
||||
prefs['worker_process_priority'] = p
|
||||
prefs['read_file_metadata'] = bool(self.pdf_metadata.isChecked())
|
||||
config['save_to_disk_single_format'] = BOOK_EXTENSIONS[self.single_format.currentIndex()]
|
||||
config['save_to_disk_single_format'] = self.book_exts[self.single_format.currentIndex()]
|
||||
config['cover_flow_queue_length'] = self.cover_browse.value()
|
||||
prefs['language'] = str(self.language.itemData(self.language.currentIndex()).toString())
|
||||
config['systray_icon'] = self.systray_icon.checkState() == Qt.Checked
|
||||
|
@ -126,7 +126,8 @@ class Config(ResizableDialog, Ui_Dialog):
|
||||
pix = QPixmap()
|
||||
pix.loadFromData(cover)
|
||||
if pix.isNull():
|
||||
d = error_dialog(self.window, _file + _(" is not a valid picture"))
|
||||
d = error_dialog(self.window, _('Error reading file'),
|
||||
_file + _(" is not a valid picture"))
|
||||
d.exec_()
|
||||
else:
|
||||
self.cover_path.setText(_file)
|
||||
|
@ -255,7 +255,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
|
||||
self.gui_headerformat.setDisabled(True)
|
||||
self.gui_header_separation.setDisabled(True)
|
||||
self.gui_use_metadata_cover.setCheckState(Qt.Checked)
|
||||
self.preprocess.addItem('No preprocessing')
|
||||
self.preprocess.addItem(_('No preprocessing'))
|
||||
for opt in self.PREPROCESS_OPTIONS:
|
||||
self.preprocess.addItem(opt.get_opt_string()[2:])
|
||||
ph = _('Preprocess the file before converting to LRF. This is useful if you know that the file is from a specific source. Known sources:')
|
||||
@ -338,7 +338,7 @@ class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
|
||||
cmd.append(opt)
|
||||
|
||||
text = qstring_to_unicode(self.preprocess.currentText())
|
||||
if text != 'No preprocessing':
|
||||
if text != _('No preprocessing'):
|
||||
cmd.append(u'--'+text)
|
||||
cmd.extend([u'--profile', qstring_to_unicode(self.gui_profile.currentText())])
|
||||
|
||||
|
@ -19,5 +19,4 @@ class Config(_Config):
|
||||
self.opt_dont_split_on_page_breaks.setVisible(False)
|
||||
self.opt_preserve_tag_structure.setVisible(False)
|
||||
self.opt_linearize_tables.setVisible(False)
|
||||
self.opt_no_justification.setVisible(False)
|
||||
self.page_map_box.setVisible(False)
|
BIN
src/calibre/gui2/images/news/soldiers.png
Normal file
BIN
src/calibre/gui2/images/news/soldiers.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 455 B |
BIN
src/calibre/gui2/images/news/theonion.png
Normal file
BIN
src/calibre/gui2/images/news/theonion.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 804 B |
@ -194,7 +194,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
|
||||
def __init__(self, pathtoebook=None):
|
||||
MainWindow.__init__(self, None)
|
||||
self.setupUi(self)
|
||||
|
||||
self.iterator = None
|
||||
self.current_page = None
|
||||
self.pending_search = None
|
||||
@ -619,7 +618,7 @@ View an ebook.
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
args = parser.parse_args(args)[-1]
|
||||
pid = os.fork() if islinux else -1
|
||||
pid = os.fork() if False and islinux else -1
|
||||
if pid <= 0:
|
||||
app = Application(args)
|
||||
app.setWindowIcon(QIcon(':/images/viewer.svg'))
|
||||
|
@ -19,7 +19,7 @@ except:
|
||||
send_message = None
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.library.database2 import LibraryDatabase2
|
||||
from calibre.ebooks.metadata.opf import OPFCreator, OPFReader
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator, OPF
|
||||
from calibre.utils.genshi.template import MarkupTemplate
|
||||
|
||||
FIELDS = set(['title', 'authors', 'author_sort', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn', 'cover'])
|
||||
@ -453,7 +453,7 @@ id is an id number from the list command.
|
||||
return 0
|
||||
|
||||
def do_set_metadata(db, id, stream):
|
||||
mi = OPFReader(stream)
|
||||
mi = OPF(stream)
|
||||
db.set_metadata(id, mi)
|
||||
do_show_metadata(db, id, False)
|
||||
if send_message is not None:
|
||||
|
@ -435,17 +435,16 @@ def post_install():
|
||||
parser = option_parser()
|
||||
opts = parser.parse_args()[0]
|
||||
|
||||
if not opts.no_root and os.geteuid() != 0:
|
||||
print >> sys.stderr, 'You must be root to run this command.'
|
||||
sys.exit(1)
|
||||
|
||||
global use_destdir
|
||||
use_destdir = opts.destdir
|
||||
manifest = []
|
||||
manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors)
|
||||
manifest += setup_completion(opts.fatal_errors)
|
||||
setup_desktop_integration(opts.fatal_errors)
|
||||
manifest += install_man_pages(opts.fatal_errors)
|
||||
if opts.no_root or os.geteuid() == 0:
|
||||
manifest += setup_udev_rules(opts.group_file, not opts.dont_reload, opts.fatal_errors)
|
||||
manifest += setup_completion(opts.fatal_errors)
|
||||
manifest += install_man_pages(opts.fatal_errors)
|
||||
else:
|
||||
print "Skipping udev, completion, and man-page install for non-root user."
|
||||
|
||||
try:
|
||||
from PyQt4 import Qt
|
||||
|
@ -15,7 +15,7 @@ DEPENDENCIES = [
|
||||
('ImageMagick', '6.3.5', 'imagemagick', 'imagemagick', 'ImageMagick'),
|
||||
('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
|
||||
('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
|
||||
('lxml', '2.0.5', 'lxml', 'python-lxml', 'python-lxml'),
|
||||
('lxml', '2.1.5', 'lxml', 'python-lxml', 'python-lxml'),
|
||||
('python-dateutil', '1.4.1', 'python-dateutil', 'python-dateutil', 'python-dateutil'),
|
||||
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
|
||||
('help2man', '1.36.4', 'help2man', 'help2man', 'help2man'),
|
||||
|
@ -32,6 +32,7 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
|
||||
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
|
||||
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
|
||||
'lamujerdemivida', 'soldiers', 'theonion',
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
76
src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py
Normal file
76
src/calibre/web/feeds/recipes/recipe_lamujerdemivida.py
Normal file
@ -0,0 +1,76 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
lamujerdemivida.com.ar
|
||||
'''
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LaMujerDeMiVida(BasicNewsRecipe):
|
||||
title = 'La Mujer de mi Vida'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Cultura de otra manera'
|
||||
oldest_article = 90
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
publisher = 'La Mujer de mi Vida'
|
||||
category = 'literatura, critica, arte, ensayos'
|
||||
language = _('Spanish')
|
||||
INDEX = 'http://www.lamujerdemivida.com.ar/'
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'width':'570'})]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.lamujerdemivida.com.ar/index.php')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'es-AR'
|
||||
soup.html['lang'] = 'es-AR'
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('img',attrs={'alt':'Lamujerdemivida.'})
|
||||
if cover_item:
|
||||
cover_url = self.INDEX + cover_item['src']
|
||||
return cover_url
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('td', attrs={'width':'390'}):
|
||||
atag = item.find('a',href=True)
|
||||
if atag:
|
||||
url = atag['href']
|
||||
title = self.tag_to_string(atag)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
57
src/calibre/web/feeds/recipes/recipe_soldiers.py
Normal file
57
src/calibre/web/feeds/recipes/recipe_soldiers.py
Normal file
@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.army.mil/soldiers/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Soldiers(BasicNewsRecipe):
|
||||
title = 'Soldiers'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'The Official U.S. Army Magazine'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
simultaneous_downloads = 1
|
||||
delay = 4
|
||||
max_connections = 1
|
||||
encoding = 'utf-8'
|
||||
publisher = 'U.S. Army'
|
||||
category = 'news, politics, war, weapons'
|
||||
language = _('English')
|
||||
INDEX = 'http://www.army.mil/soldiers/'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'rightCol'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
|
||||
feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('img',attrs={'alt':'Current Magazine Cover'})
|
||||
if cover_item:
|
||||
cover_url = cover_item['src']
|
||||
return cover_url
|
45
src/calibre/web/feeds/recipes/recipe_theonion.py
Normal file
45
src/calibre/web/feeds/recipes/recipe_theonion.py
Normal file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
theonion.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheOnion(BasicNewsRecipe):
|
||||
title = 'The Onion'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "America's finest news source"
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
publisher = u'Onion, Inc.'
|
||||
category = u'humor, news, USA'
|
||||
language = _('English')
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher' , publisher
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe','base'])
|
||||
,dict(name='div', attrs={'class':['toolbar_side','graphical_feature','toolbar_bottom']})
|
||||
,dict(name='div', attrs={'id':['recent_slider','sidebar','pagination','related_media']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Daily' , u'http://feeds.theonion.com/theonion/daily' )
|
||||
,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' )
|
||||
]
|
Loading…
x
Reference in New Issue
Block a user