mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement a check setup command that uses PyFlakes to check for various errors
This commit is contained in:
parent
792c6b0b22
commit
f9ff180347
@ -1,5 +1,5 @@
|
|||||||
*_ui.py
|
*_ui.py
|
||||||
moc_*.cpp
|
.check-cache.pickle
|
||||||
src/calibre/plugins
|
src/calibre/plugins
|
||||||
resources/images.qrc
|
resources/images.qrc
|
||||||
src/calibre/manual/.build/
|
src/calibre/manual/.build/
|
||||||
|
3
setup.py
3
setup.py
@ -6,7 +6,6 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
import sys, os, optparse
|
import sys, os, optparse
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
|
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
|
||||||
@ -70,7 +69,7 @@ def main(args=sys.argv):
|
|||||||
command.clean()
|
command.clean()
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if opts.clean_all():
|
if opts.clean_all:
|
||||||
for cmd in commands.__all__:
|
for cmd in commands.__all__:
|
||||||
prints('Cleaning', cmd)
|
prints('Cleaning', cmd)
|
||||||
getattr(commands, cmd).clean()
|
getattr(commands, cmd).clean()
|
||||||
|
75
setup/check.py
Normal file
75
setup/check.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, os, cPickle, subprocess
|
||||||
|
from operator import attrgetter
|
||||||
|
from setup import Command
|
||||||
|
|
||||||
|
def check_for_python_errors(filename, builtins):
|
||||||
|
from pyflakes import checker, ast
|
||||||
|
|
||||||
|
contents = open(filename, 'rb').read()
|
||||||
|
|
||||||
|
try:
|
||||||
|
tree = ast.parse(contents, filename)
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
try:
|
||||||
|
value = sys.exc_info()[1]
|
||||||
|
lineno, offset, line = value[1][1:]
|
||||||
|
except IndexError:
|
||||||
|
lineno, offset, line = 1, 0, ''
|
||||||
|
if line.endswith("\n"):
|
||||||
|
line = line[:-1]
|
||||||
|
|
||||||
|
return [SyntaxError(filename, lineno, offset, str(value))]
|
||||||
|
else:
|
||||||
|
w = checker.Checker(tree, filename, builtins = builtins)
|
||||||
|
w.messages.sort(key = attrgetter('lineno'))
|
||||||
|
return w.messages
|
||||||
|
|
||||||
|
|
||||||
|
class Check(Command):
|
||||||
|
|
||||||
|
BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P']
|
||||||
|
CACHE = '.check-cache.pickle'
|
||||||
|
|
||||||
|
def run(self, opts):
|
||||||
|
cache = {}
|
||||||
|
if os.path.exists(self.CACHE):
|
||||||
|
cache = cPickle.load(open(self.CACHE, 'rb'))
|
||||||
|
for x in os.walk(self.j(self.SRC, 'calibre')):
|
||||||
|
for f in x[-1]:
|
||||||
|
f = self.j(x[0], f)
|
||||||
|
mtime = os.stat(f).st_mtime
|
||||||
|
if f.endswith('.py') and cache.get(f, 0) != mtime and \
|
||||||
|
self.b(f) not in ('ptempfile.py', 'feedparser.py',
|
||||||
|
'pyparsing.py', 'markdown.py') and 'genshi' not in f and \
|
||||||
|
'prs500/driver.py' not in f:
|
||||||
|
self.info('\tChecking', f)
|
||||||
|
w = check_for_python_errors(f, self.BUILTINS)
|
||||||
|
if w:
|
||||||
|
self.report_errors(w)
|
||||||
|
cPickle.dump(cache, open(self.CACHE, 'wb'), -1)
|
||||||
|
subprocess.call(['gvim', '-f', f])
|
||||||
|
raise SystemExit(1)
|
||||||
|
cache[f] = mtime
|
||||||
|
cPickle.dump(cache, open(self.CACHE, 'wb'), -1)
|
||||||
|
|
||||||
|
|
||||||
|
def report_errors(self, errors):
|
||||||
|
for err in errors:
|
||||||
|
if isinstance(err, SyntaxError):
|
||||||
|
print '\t\tSyntax Error'
|
||||||
|
else:
|
||||||
|
col = getattr(err, 'col', 0) if getattr(err, 'col', 0) else 0
|
||||||
|
lineno = err.lineno if err.lineno else 0
|
||||||
|
self.info('\t\t%d:%d:'%(lineno, col),
|
||||||
|
err.message%err.message_args)
|
||||||
|
|
@ -11,6 +11,7 @@ __all__ = [
|
|||||||
'build',
|
'build',
|
||||||
'gui',
|
'gui',
|
||||||
'develop',
|
'develop',
|
||||||
|
'check',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -29,6 +30,8 @@ develop = Develop()
|
|||||||
from setup.gui import GUI
|
from setup.gui import GUI
|
||||||
gui = GUI()
|
gui = GUI()
|
||||||
|
|
||||||
|
from setup.check import Check
|
||||||
|
check = Check()
|
||||||
|
|
||||||
commands = {}
|
commands = {}
|
||||||
for x in __all__:
|
for x in __all__:
|
||||||
|
@ -78,9 +78,10 @@ class GUI(Command):
|
|||||||
dat = pat.sub(sub, dat)
|
dat = pat.sub(sub, dat)
|
||||||
|
|
||||||
if form.endswith('viewer%smain.ui'%os.sep):
|
if form.endswith('viewer%smain.ui'%os.sep):
|
||||||
self.inf('\t\tPromoting WebView')
|
self.info('\t\tPromoting WebView')
|
||||||
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
|
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
|
||||||
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
|
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
|
||||||
|
dat += '\nQtWebKit'
|
||||||
|
|
||||||
open(compiled_form, 'wb').write(dat)
|
open(compiled_form, 'wb').write(dat)
|
||||||
|
|
||||||
|
@ -21,6 +21,11 @@ from calibre.constants import iswindows, isosx, islinux, isfrozen, \
|
|||||||
filesystem_encoding
|
filesystem_encoding
|
||||||
import mechanize
|
import mechanize
|
||||||
|
|
||||||
|
if False:
|
||||||
|
winutil, winutilerror, __appname__, islinux, __version__
|
||||||
|
fcntl, win32event, isfrozen, __author__, terminal_controller
|
||||||
|
winerror, win32api
|
||||||
|
|
||||||
mimetypes.add_type('application/epub+zip', '.epub')
|
mimetypes.add_type('application/epub+zip', '.epub')
|
||||||
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
||||||
mimetypes.add_type('application/xhtml+xml', '.xhtml')
|
mimetypes.add_type('application/xhtml+xml', '.xhtml')
|
||||||
|
@ -13,19 +13,19 @@
|
|||||||
# modify it under the terms of the GNU Lesser General Public
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
# License as published by the Free Software Foundation; either
|
# License as published by the Free Software Foundation; either
|
||||||
# version 2.1 of the License, or (at your option) any later version.
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
#
|
#
|
||||||
# This library is distributed in the hope that it will be useful,
|
# This library is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
# Lesser General Public License for more details.
|
# Lesser General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU Lesser General Public
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
# License along with this library; if not, write to the Free Software
|
# License along with this library; if not, write to the Free Software
|
||||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
from constants import eStart, eError, eItsMe
|
from constants import eStart
|
||||||
|
|
||||||
class CodingStateMachine:
|
class CodingStateMachine:
|
||||||
def __init__(self, sm):
|
def __init__(self, sm):
|
||||||
|
@ -13,19 +13,19 @@
|
|||||||
# modify it under the terms of the GNU Lesser General Public
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
# License as published by the Free Software Foundation; either
|
# License as published by the Free Software Foundation; either
|
||||||
# version 2.1 of the License, or (at your option) any later version.
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
#
|
#
|
||||||
# This library is distributed in the hope that it will be useful,
|
# This library is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
# Lesser General Public License for more details.
|
# Lesser General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU Lesser General Public
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
# License along with this library; if not, write to the Free Software
|
# License along with this library; if not, write to the Free Software
|
||||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
import constants, sys
|
import constants
|
||||||
from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel
|
from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel
|
||||||
from charsetprober import CharSetProber
|
from charsetprober import CharSetProber
|
||||||
from codingstatemachine import CodingStateMachine
|
from codingstatemachine import CodingStateMachine
|
||||||
@ -75,5 +75,5 @@ class EscCharSetProber(CharSetProber):
|
|||||||
self._mState = constants.eFoundIt
|
self._mState = constants.eFoundIt
|
||||||
self._mDetectedCharset = codingSM.get_coding_state_machine()
|
self._mDetectedCharset = codingSM.get_coding_state_machine()
|
||||||
return self.get_state()
|
return self.get_state()
|
||||||
|
|
||||||
return self.get_state()
|
return self.get_state()
|
||||||
|
@ -14,19 +14,19 @@
|
|||||||
# modify it under the terms of the GNU Lesser General Public
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
# License as published by the Free Software Foundation; either
|
# License as published by the Free Software Foundation; either
|
||||||
# version 2.1 of the License, or (at your option) any later version.
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
#
|
#
|
||||||
# This library is distributed in the hope that it will be useful,
|
# This library is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
# Lesser General Public License for more details.
|
# Lesser General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU Lesser General Public
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
# License along with this library; if not, write to the Free Software
|
# License along with this library; if not, write to the Free Software
|
||||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
import constants, sys
|
import constants
|
||||||
from charsetgroupprober import CharSetGroupProber
|
from charsetgroupprober import CharSetGroupProber
|
||||||
from sbcharsetprober import SingleByteCharSetProber
|
from sbcharsetprober import SingleByteCharSetProber
|
||||||
from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model
|
from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model
|
||||||
|
@ -13,19 +13,19 @@
|
|||||||
# modify it under the terms of the GNU Lesser General Public
|
# modify it under the terms of the GNU Lesser General Public
|
||||||
# License as published by the Free Software Foundation; either
|
# License as published by the Free Software Foundation; either
|
||||||
# version 2.1 of the License, or (at your option) any later version.
|
# version 2.1 of the License, or (at your option) any later version.
|
||||||
#
|
#
|
||||||
# This library is distributed in the hope that it will be useful,
|
# This library is distributed in the hope that it will be useful,
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
# Lesser General Public License for more details.
|
# Lesser General Public License for more details.
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU Lesser General Public
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
# License along with this library; if not, write to the Free Software
|
# License along with this library; if not, write to the Free Software
|
||||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||||
# 02110-1301 USA
|
# 02110-1301 USA
|
||||||
######################### END LICENSE BLOCK #########################
|
######################### END LICENSE BLOCK #########################
|
||||||
|
|
||||||
import constants, sys
|
import constants
|
||||||
from constants import eStart, eError, eItsMe
|
from constants import eStart, eError, eItsMe
|
||||||
from charsetprober import CharSetProber
|
from charsetprober import CharSetProber
|
||||||
from codingstatemachine import CodingStateMachine
|
from codingstatemachine import CodingStateMachine
|
||||||
|
@ -8,11 +8,10 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, re
|
import re
|
||||||
from itertools import count, chain
|
from itertools import count
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
from calibre.ebooks.oeb.base import XHTML_NS
|
||||||
from calibre.ebooks.oeb.base import OEBBook
|
from calibre.ebooks.oeb.base import OEBBook
|
||||||
from lxml import etree, html
|
|
||||||
from lxml.etree import XPath
|
from lxml.etree import XPath
|
||||||
|
|
||||||
NSMAP = {'h': XHTML_NS, 'html': XHTML_NS, 'xhtml': XHTML_NS}
|
NSMAP = {'h': XHTML_NS, 'html': XHTML_NS, 'xhtml': XHTML_NS}
|
||||||
@ -55,5 +54,5 @@ def add_page_map(opfpath, opts):
|
|||||||
id = elem.attrib['id'] = idgen.next()
|
id = elem.attrib['id'] = idgen.next()
|
||||||
href = '#'.join((item.href, id))
|
href = '#'.join((item.href, id))
|
||||||
oeb.pages.add(name, href)
|
oeb.pages.add(name, href)
|
||||||
writer = DirWriter(version='2.0', page_map=True)
|
writer = None#DirWriter(version='2.0', page_map=True)
|
||||||
writer.dump(oeb, opfpath)
|
writer.dump(oeb, opfpath)
|
||||||
|
@ -6,7 +6,6 @@ from __future__ import with_statement
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
import sys
|
|
||||||
from calibre import plugins
|
from calibre import plugins
|
||||||
|
|
||||||
_lzx, _error = plugins['lzx']
|
_lzx, _error = plugins['lzx']
|
||||||
|
@ -7,3 +7,5 @@ Microsoft LIT tag and attribute tables.
|
|||||||
|
|
||||||
from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
|
from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
|
||||||
from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
|
from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
|
||||||
|
|
||||||
|
OPF_MAP, HTML_MAP
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
import sys, os
|
import os
|
||||||
from calibre import iswindows
|
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PIL import ImageFont
|
from PIL import ImageFont
|
||||||
|
ImageFont
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import ImageFont
|
import ImageFont
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Default fonts used in the PRS500
|
Default fonts used in the PRS500
|
||||||
'''
|
'''
|
||||||
@ -48,11 +48,11 @@ def get_font_path(name):
|
|||||||
# then, try calibre shipped ones
|
# then, try calibre shipped ones
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
font_mod = __import__('calibre.ebooks.lrf.fonts.prs500', {}, {},
|
font_mod = __import__('calibre.ebooks.lrf.fonts.prs500', {}, {},
|
||||||
[fname], -1)
|
[fname], -1)
|
||||||
getattr(font_mod, fname)
|
getattr(font_mod, fname)
|
||||||
except (ImportError, AttributeError):
|
except (ImportError, AttributeError):
|
||||||
font_mod = __import__('calibre.ebooks.lrf.fonts.liberation', {}, {},
|
font_mod = __import__('calibre.ebooks.lrf.fonts.liberation', {}, {},
|
||||||
[LIBERATION_FONT_MAP[name]], -1)
|
[LIBERATION_FONT_MAP[name]], -1)
|
||||||
p = PersistentTemporaryFile('.ttf', 'font_')
|
p = PersistentTemporaryFile('.ttf', 'font_')
|
||||||
p.write(getattr(font_mod, fname).font_data)
|
p.write(getattr(font_mod, fname).font_data)
|
||||||
@ -61,7 +61,7 @@ def get_font_path(name):
|
|||||||
return p.name
|
return p.name
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# finally, try system default ones
|
# finally, try system default ones
|
||||||
if SYSTEM_FONT_MAP.has_key(name) and os.access(SYSTEM_FONT_MAP[name], os.R_OK):
|
if SYSTEM_FONT_MAP.has_key(name) and os.access(SYSTEM_FONT_MAP[name], os.R_OK):
|
||||||
return SYSTEM_FONT_MAP[name]
|
return SYSTEM_FONT_MAP[name]
|
||||||
@ -71,7 +71,7 @@ def get_font_path(name):
|
|||||||
|
|
||||||
def get_font(name, size, encoding='unic'):
|
def get_font(name, size, encoding='unic'):
|
||||||
'''
|
'''
|
||||||
Get an ImageFont object by name.
|
Get an ImageFont object by name.
|
||||||
@param size: Font height in pixels. To convert from pts:
|
@param size: Font height in pixels. To convert from pts:
|
||||||
sz in pixels = (dpi/72) * size in pts
|
sz in pixels = (dpi/72) * size in pts
|
||||||
@param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm'
|
@param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm'
|
||||||
|
@ -94,7 +94,7 @@ NAME_MAP = {
|
|||||||
u'springgreen': u'#00FF7F',
|
u'springgreen': u'#00FF7F',
|
||||||
u'violet': u'#EE82EE',
|
u'violet': u'#EE82EE',
|
||||||
u'yellowgreen': u'#9ACD32'
|
u'yellowgreen': u'#9ACD32'
|
||||||
}
|
}
|
||||||
|
|
||||||
hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})')
|
hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})')
|
||||||
rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
|
rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
|
||||||
@ -109,5 +109,5 @@ def lrs_color(html_color):
|
|||||||
if hcol in NAME_MAP:
|
if hcol in NAME_MAP:
|
||||||
return NAME_MAP[hcol].replace('#', '0x00')
|
return NAME_MAP[hcol].replace('#', '0x00')
|
||||||
return '0x00000000'
|
return '0x00000000'
|
||||||
|
|
||||||
|
|
||||||
|
@ -10,13 +10,13 @@ from calibre.ebooks.lrf.lrfparser import LRFDocument
|
|||||||
from calibre.ebooks.metadata.opf import OPFCreator
|
from calibre.ebooks.metadata.opf import OPFCreator
|
||||||
|
|
||||||
from calibre.ebooks.lrf.objects import PageAttr, BlockAttr, TextAttr
|
from calibre.ebooks.lrf.objects import PageAttr, BlockAttr, TextAttr
|
||||||
|
from calibre.ebooks.lrf.pylrs.pylrs import TextStyle
|
||||||
|
|
||||||
class BlockStyle(object):
|
class BlockStyle(object):
|
||||||
|
|
||||||
def __init__(self, ba):
|
def __init__(self, ba):
|
||||||
self.ba = ba
|
self.ba = ba
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
ans = '.'+str(self.ba.id)+' {\n'
|
ans = '.'+str(self.ba.id)+' {\n'
|
||||||
if hasattr(self.ba, 'sidemargin'):
|
if hasattr(self.ba, 'sidemargin'):
|
||||||
@ -37,10 +37,10 @@ class BlockStyle(object):
|
|||||||
ans += '\tbackground-color: %s;\n'%(self.ba.bgcolor.to_html())
|
ans += '\tbackground-color: %s;\n'%(self.ba.bgcolor.to_html())
|
||||||
#TODO: Fixed size blocks
|
#TODO: Fixed size blocks
|
||||||
return ans + '}\n'
|
return ans + '}\n'
|
||||||
|
|
||||||
|
|
||||||
class LRFConverter(object):
|
class LRFConverter(object):
|
||||||
|
|
||||||
def __init__(self, document, opts, logger):
|
def __init__(self, document, opts, logger):
|
||||||
self.lrf = document
|
self.lrf = document
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
@ -48,15 +48,15 @@ class LRFConverter(object):
|
|||||||
self.logger = logger
|
self.logger = logger
|
||||||
logger.info('Parsing LRF...')
|
logger.info('Parsing LRF...')
|
||||||
self.lrf.parse()
|
self.lrf.parse()
|
||||||
|
|
||||||
self.create_metadata()
|
self.create_metadata()
|
||||||
self.create_styles()
|
self.create_styles()
|
||||||
|
|
||||||
def create_metadata(self):
|
def create_metadata(self):
|
||||||
self.logger.info('Reading metadata...')
|
self.logger.info('Reading metadata...')
|
||||||
mi = get_metadata(self.lrf)
|
mi = get_metadata(self.lrf)
|
||||||
self.opf = OPFCreator(self.output_dir, mi)
|
self.opf = OPFCreator(self.output_dir, mi)
|
||||||
|
|
||||||
def create_page_styles(self):
|
def create_page_styles(self):
|
||||||
self.page_css = ''
|
self.page_css = ''
|
||||||
for obj in self.lrf.objects.values():
|
for obj in self.lrf.objects.values():
|
||||||
@ -65,21 +65,21 @@ class LRFConverter(object):
|
|||||||
self.page_css = selector + ' {\n'
|
self.page_css = selector + ' {\n'
|
||||||
# TODO: Headers and footers
|
# TODO: Headers and footers
|
||||||
self.page_css += '}\n'
|
self.page_css += '}\n'
|
||||||
|
|
||||||
|
|
||||||
def create_block_styles(self):
|
def create_block_styles(self):
|
||||||
self.block_css = ''
|
self.block_css = ''
|
||||||
for obj in self.lrf.objects.values():
|
for obj in self.lrf.objects.values():
|
||||||
if isinstance(obj, BlockAttr):
|
if isinstance(obj, BlockAttr):
|
||||||
self.block_css += str(BlockStyle(obj))
|
self.block_css += str(BlockStyle(obj))
|
||||||
|
|
||||||
def create_text_styles(self):
|
def create_text_styles(self):
|
||||||
self.text_css = ''
|
self.text_css = ''
|
||||||
for obj in self.lrf.objects.values():
|
for obj in self.lrf.objects.values():
|
||||||
if isinstance(obj, TextAttr):
|
if isinstance(obj, TextAttr):
|
||||||
self.text_css += str(TextStyle(obj))
|
self.text_css += str(TextStyle(obj))
|
||||||
print self.text_css
|
print self.text_css
|
||||||
|
|
||||||
def create_styles(self):
|
def create_styles(self):
|
||||||
self.logger.info('Creating CSS stylesheet...')
|
self.logger.info('Creating CSS stylesheet...')
|
||||||
self.create_page_styles()
|
self.create_page_styles()
|
||||||
@ -104,9 +104,9 @@ def process_file(lrfpath, opts, logger=None):
|
|||||||
raise ConversionError(opts.out + ' is not a directory')
|
raise ConversionError(opts.out + ' is not a directory')
|
||||||
if not os.path.exists(opts.out):
|
if not os.path.exists(opts.out):
|
||||||
os.makedirs(opts.out)
|
os.makedirs(opts.out)
|
||||||
|
|
||||||
document = LRFDocument(open(lrfpath, 'rb'))
|
document = LRFDocument(open(lrfpath, 'rb'))
|
||||||
conv = LRFConverter(document, opts, logger)
|
conv = LRFConverter(document, opts, logger)
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
@ -116,7 +116,7 @@ def main(args=sys.argv):
|
|||||||
parser.print_help()
|
parser.print_help()
|
||||||
return 1
|
return 1
|
||||||
process_file(args[1], opts)
|
process_file(args[1], opts)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,23 +11,23 @@ def ceil(num):
|
|||||||
return int(math.ceil(num))
|
return int(math.ceil(num))
|
||||||
|
|
||||||
def print_xml(elem):
|
def print_xml(elem):
|
||||||
from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter
|
from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter
|
||||||
elem = elem.toElement('utf8')
|
elem = elem.toElement('utf8')
|
||||||
ew = ElementWriter(elem, sourceEncoding='utf8')
|
ew = ElementWriter(elem, sourceEncoding='utf8')
|
||||||
ew.write(sys.stdout)
|
ew.write(sys.stdout)
|
||||||
print
|
print
|
||||||
|
|
||||||
def cattrs(base, extra):
|
def cattrs(base, extra):
|
||||||
new = base.copy()
|
new = base.copy()
|
||||||
new.update(extra)
|
new.update(extra)
|
||||||
return new
|
return new
|
||||||
|
|
||||||
def tokens(tb):
|
def tokens(tb):
|
||||||
'''
|
'''
|
||||||
Return the next token. A token is :
|
Return the next token. A token is :
|
||||||
1. A string
|
1. A string
|
||||||
a block of text that has the same style
|
a block of text that has the same style
|
||||||
'''
|
'''
|
||||||
def process_element(x, attrs):
|
def process_element(x, attrs):
|
||||||
if isinstance(x, CR):
|
if isinstance(x, CR):
|
||||||
yield 2, None
|
yield 2, None
|
||||||
@ -49,22 +49,22 @@ def tokens(tb):
|
|||||||
for y in x.contents:
|
for y in x.contents:
|
||||||
for z in process_element(y, attrs):
|
for z in process_element(y, attrs):
|
||||||
yield z
|
yield z
|
||||||
|
|
||||||
|
|
||||||
for i in tb.contents:
|
for i in tb.contents:
|
||||||
if isinstance(i, CR):
|
if isinstance(i, CR):
|
||||||
yield 1, None
|
yield 1, None
|
||||||
elif isinstance(i, Paragraph):
|
elif isinstance(i, Paragraph):
|
||||||
for j in i.contents:
|
for j in i.contents:
|
||||||
attrs = {}
|
attrs = {}
|
||||||
if hasattr(j, 'attrs'):
|
if hasattr(j, 'attrs'):
|
||||||
attrs = j.attrs
|
attrs = j.attrs
|
||||||
for k in process_element(j, attrs):
|
for k in process_element(j, attrs):
|
||||||
yield k
|
yield k
|
||||||
|
|
||||||
|
|
||||||
class Cell(object):
|
class Cell(object):
|
||||||
|
|
||||||
def __init__(self, conv, tag, css):
|
def __init__(self, conv, tag, css):
|
||||||
self.conv = conv
|
self.conv = conv
|
||||||
self.tag = tag
|
self.tag = tag
|
||||||
@ -89,7 +89,7 @@ class Cell(object):
|
|||||||
self.rowspan = int(tag['rowspan']) if tag.has_key('rowspan') else 1
|
self.rowspan = int(tag['rowspan']) if tag.has_key('rowspan') else 1
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
pp = conv.current_page
|
pp = conv.current_page
|
||||||
conv.book.allow_new_page = False
|
conv.book.allow_new_page = False
|
||||||
conv.current_page = conv.book.create_page()
|
conv.current_page = conv.book.create_page()
|
||||||
@ -99,7 +99,7 @@ class Cell(object):
|
|||||||
if isinstance(item, TextBlock):
|
if isinstance(item, TextBlock):
|
||||||
self.text_blocks.append(item)
|
self.text_blocks.append(item)
|
||||||
conv.current_page = pp
|
conv.current_page = pp
|
||||||
conv.book.allow_new_page = True
|
conv.book.allow_new_page = True
|
||||||
if not self.text_blocks:
|
if not self.text_blocks:
|
||||||
tb = conv.book.create_text_block()
|
tb = conv.book.create_text_block()
|
||||||
tb.Paragraph(' ')
|
tb.Paragraph(' ')
|
||||||
@ -107,7 +107,7 @@ class Cell(object):
|
|||||||
for tb in self.text_blocks:
|
for tb in self.text_blocks:
|
||||||
tb.parent = None
|
tb.parent = None
|
||||||
tb.objId = 0
|
tb.objId = 0
|
||||||
# Needed as we have to eventually change this BlockStyle's width and
|
# Needed as we have to eventually change this BlockStyle's width and
|
||||||
# height attributes. This blockstyle may be shared with other
|
# height attributes. This blockstyle may be shared with other
|
||||||
# elements, so doing that causes havoc.
|
# elements, so doing that causes havoc.
|
||||||
tb.blockStyle = conv.book.create_block_style()
|
tb.blockStyle = conv.book.create_block_style()
|
||||||
@ -117,17 +117,17 @@ class Cell(object):
|
|||||||
if ts.attrs['align'] == 'foot':
|
if ts.attrs['align'] == 'foot':
|
||||||
if isinstance(tb.contents[-1], Paragraph):
|
if isinstance(tb.contents[-1], Paragraph):
|
||||||
tb.contents[-1].append(' ')
|
tb.contents[-1].append(' ')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def pts_to_pixels(self, pts):
|
def pts_to_pixels(self, pts):
|
||||||
pts = int(pts)
|
pts = int(pts)
|
||||||
return ceil((float(self.conv.profile.dpi)/72.)*(pts/10.))
|
return ceil((float(self.conv.profile.dpi)/72.)*(pts/10.))
|
||||||
|
|
||||||
def minimum_width(self):
|
def minimum_width(self):
|
||||||
return max([self.minimum_tb_width(tb) for tb in self.text_blocks])
|
return max([self.minimum_tb_width(tb) for tb in self.text_blocks])
|
||||||
|
|
||||||
def minimum_tb_width(self, tb):
|
def minimum_tb_width(self, tb):
|
||||||
ts = tb.textStyle.attrs
|
ts = tb.textStyle.attrs
|
||||||
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
|
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
|
||||||
@ -135,7 +135,7 @@ class Cell(object):
|
|||||||
mwidth = 0
|
mwidth = 0
|
||||||
for token, attrs in tokens(tb):
|
for token, attrs in tokens(tb):
|
||||||
font = default_font
|
font = default_font
|
||||||
if isinstance(token, int): # Handle para and line breaks
|
if isinstance(token, int): # Handle para and line breaks
|
||||||
continue
|
continue
|
||||||
if isinstance(token, Plot):
|
if isinstance(token, Plot):
|
||||||
return self.pts_to_pixels(token.xsize)
|
return self.pts_to_pixels(token.xsize)
|
||||||
@ -151,24 +151,24 @@ class Cell(object):
|
|||||||
if width > mwidth:
|
if width > mwidth:
|
||||||
mwidth = width
|
mwidth = width
|
||||||
return parindent + mwidth + 2
|
return parindent + mwidth + 2
|
||||||
|
|
||||||
def text_block_size(self, tb, maxwidth=sys.maxint, debug=False):
|
def text_block_size(self, tb, maxwidth=sys.maxint, debug=False):
|
||||||
ts = tb.textStyle.attrs
|
ts = tb.textStyle.attrs
|
||||||
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
|
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
|
||||||
parindent = self.pts_to_pixels(ts['parindent'])
|
parindent = self.pts_to_pixels(ts['parindent'])
|
||||||
top, bottom, left, right = 0, 0, parindent, parindent
|
top, bottom, left, right = 0, 0, parindent, parindent
|
||||||
|
|
||||||
def add_word(width, height, left, right, top, bottom, ls, ws):
|
def add_word(width, height, left, right, top, bottom, ls, ws):
|
||||||
if left + width > maxwidth:
|
if left + width > maxwidth:
|
||||||
left = width + ws
|
left = width + ws
|
||||||
top += ls
|
top += ls
|
||||||
bottom = top+ls if top+ls > bottom else bottom
|
bottom = top+ls if top+ls > bottom else bottom
|
||||||
else:
|
else:
|
||||||
left += (width + ws)
|
left += (width + ws)
|
||||||
right = left if left > right else right
|
right = left if left > right else right
|
||||||
bottom = top+ls if top+ls > bottom else bottom
|
bottom = top+ls if top+ls > bottom else bottom
|
||||||
return left, right, top, bottom
|
return left, right, top, bottom
|
||||||
|
|
||||||
for token, attrs in tokens(tb):
|
for token, attrs in tokens(tb):
|
||||||
if attrs == None:
|
if attrs == None:
|
||||||
attrs = {}
|
attrs = {}
|
||||||
@ -196,17 +196,17 @@ class Cell(object):
|
|||||||
width, height = font.getsize(word)
|
width, height = font.getsize(word)
|
||||||
left, right, top, bottom = add_word(width, height, left, right, top, bottom, ls, ws)
|
left, right, top, bottom = add_word(width, height, left, right, top, bottom, ls, ws)
|
||||||
return right+3+max(parindent, 10), bottom
|
return right+3+max(parindent, 10), bottom
|
||||||
|
|
||||||
def text_block_preferred_width(self, tb, debug=False):
|
def text_block_preferred_width(self, tb, debug=False):
|
||||||
return self.text_block_size(tb, sys.maxint, debug=debug)[0]
|
return self.text_block_size(tb, sys.maxint, debug=debug)[0]
|
||||||
|
|
||||||
def preferred_width(self, debug=False):
|
def preferred_width(self, debug=False):
|
||||||
return ceil(max([self.text_block_preferred_width(i, debug=debug) for i in self.text_blocks]))
|
return ceil(max([self.text_block_preferred_width(i, debug=debug) for i in self.text_blocks]))
|
||||||
|
|
||||||
def height(self, width):
|
def height(self, width):
|
||||||
return sum([self.text_block_size(i, width)[1] for i in self.text_blocks])
|
return sum([self.text_block_size(i, width)[1] for i in self.text_blocks])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Row(object):
|
class Row(object):
|
||||||
def __init__(self, conv, row, css, colpad):
|
def __init__(self, conv, row, css, colpad):
|
||||||
@ -221,15 +221,15 @@ class Row(object):
|
|||||||
name = a['name'] if a.has_key('name') else a['id'] if a.has_key('id') else None
|
name = a['name'] if a.has_key('name') else a['id'] if a.has_key('id') else None
|
||||||
if name is not None:
|
if name is not None:
|
||||||
self.targets.append(name.replace('#', ''))
|
self.targets.append(name.replace('#', ''))
|
||||||
|
|
||||||
|
|
||||||
def number_of_cells(self):
|
def number_of_cells(self):
|
||||||
'''Number of cells in this row. Respects colspan'''
|
'''Number of cells in this row. Respects colspan'''
|
||||||
ans = 0
|
ans = 0
|
||||||
for cell in self.cells:
|
for cell in self.cells:
|
||||||
ans += cell.colspan
|
ans += cell.colspan
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def height(self, widths):
|
def height(self, widths):
|
||||||
i, heights = 0, []
|
i, heights = 0, []
|
||||||
for cell in self.cells:
|
for cell in self.cells:
|
||||||
@ -239,11 +239,11 @@ class Row(object):
|
|||||||
if not heights:
|
if not heights:
|
||||||
return 0
|
return 0
|
||||||
return max(heights)
|
return max(heights)
|
||||||
|
|
||||||
def cell_from_index(self, col):
|
def cell_from_index(self, col):
|
||||||
i = -1
|
i = -1
|
||||||
cell = None
|
cell = None
|
||||||
for cell in self.cells:
|
for cell in self.cells:
|
||||||
for k in range(0, cell.colspan):
|
for k in range(0, cell.colspan):
|
||||||
if i == col:
|
if i == col:
|
||||||
break
|
break
|
||||||
@ -251,30 +251,30 @@ class Row(object):
|
|||||||
if i == col:
|
if i == col:
|
||||||
break
|
break
|
||||||
return cell
|
return cell
|
||||||
|
|
||||||
def minimum_width(self, col):
|
def minimum_width(self, col):
|
||||||
cell = self.cell_from_index(col)
|
cell = self.cell_from_index(col)
|
||||||
if not cell:
|
if not cell:
|
||||||
return 0
|
return 0
|
||||||
return cell.minimum_width()
|
return cell.minimum_width()
|
||||||
|
|
||||||
def preferred_width(self, col):
|
def preferred_width(self, col):
|
||||||
cell = self.cell_from_index(col)
|
cell = self.cell_from_index(col)
|
||||||
if not cell:
|
if not cell:
|
||||||
return 0
|
return 0
|
||||||
return 0 if cell.colspan > 1 else cell.preferred_width()
|
return 0 if cell.colspan > 1 else cell.preferred_width()
|
||||||
|
|
||||||
def width_percent(self, col):
|
def width_percent(self, col):
|
||||||
cell = self.cell_from_index(col)
|
cell = self.cell_from_index(col)
|
||||||
if not cell:
|
if not cell:
|
||||||
return -1
|
return -1
|
||||||
return -1 if cell.colspan > 1 else cell.pwidth
|
return -1 if cell.colspan > 1 else cell.pwidth
|
||||||
|
|
||||||
def cell_iterator(self):
|
def cell_iterator(self):
|
||||||
for c in self.cells:
|
for c in self.cells:
|
||||||
yield c
|
yield c
|
||||||
|
|
||||||
|
|
||||||
class Table(object):
|
class Table(object):
|
||||||
def __init__(self, conv, table, css, rowpad=10, colpad=10):
|
def __init__(self, conv, table, css, rowpad=10, colpad=10):
|
||||||
self.rows = []
|
self.rows = []
|
||||||
@ -283,31 +283,31 @@ class Table(object):
|
|||||||
self.colpad = colpad
|
self.colpad = colpad
|
||||||
rows = table.findAll('tr')
|
rows = table.findAll('tr')
|
||||||
conv.in_table = True
|
conv.in_table = True
|
||||||
for row in rows:
|
for row in rows:
|
||||||
rcss = conv.tag_css(row, css)[0]
|
rcss = conv.tag_css(row, css)[0]
|
||||||
self.rows.append(Row(conv, row, rcss, colpad))
|
self.rows.append(Row(conv, row, rcss, colpad))
|
||||||
conv.in_table = False
|
conv.in_table = False
|
||||||
|
|
||||||
def number_of_columns(self):
|
def number_of_columns(self):
|
||||||
max = 0
|
max = 0
|
||||||
for row in self.rows:
|
for row in self.rows:
|
||||||
max = row.number_of_cells() if row.number_of_cells() > max else max
|
max = row.number_of_cells() if row.number_of_cells() > max else max
|
||||||
return max
|
return max
|
||||||
|
|
||||||
def number_or_rows(self):
|
def number_or_rows(self):
|
||||||
return len(self.rows)
|
return len(self.rows)
|
||||||
|
|
||||||
def height(self, maxwidth):
|
def height(self, maxwidth):
|
||||||
''' Return row heights + self.rowpad'''
|
''' Return row heights + self.rowpad'''
|
||||||
widths = self.get_widths(maxwidth)
|
widths = self.get_widths(maxwidth)
|
||||||
return sum([row.height(widths) + self.rowpad for row in self.rows]) - self.rowpad
|
return sum([row.height(widths) + self.rowpad for row in self.rows]) - self.rowpad
|
||||||
|
|
||||||
def minimum_width(self, col):
|
def minimum_width(self, col):
|
||||||
return max([row.minimum_width(col) for row in self.rows])
|
return max([row.minimum_width(col) for row in self.rows])
|
||||||
|
|
||||||
def width_percent(self, col):
|
def width_percent(self, col):
|
||||||
return max([row.width_percent(col) for row in self.rows])
|
return max([row.width_percent(col) for row in self.rows])
|
||||||
|
|
||||||
def get_widths(self, maxwidth):
|
def get_widths(self, maxwidth):
|
||||||
'''
|
'''
|
||||||
Return widths of columns + self.colpad
|
Return widths of columns + self.colpad
|
||||||
@ -320,29 +320,29 @@ class Table(object):
|
|||||||
try:
|
try:
|
||||||
cellwidths[r] = self.rows[r].preferred_width(c)
|
cellwidths[r] = self.rows[r].preferred_width(c)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
continue
|
continue
|
||||||
widths[c] = max(cellwidths)
|
widths[c] = max(cellwidths)
|
||||||
|
|
||||||
min_widths = [self.minimum_width(i)+10 for i in xrange(cols)]
|
min_widths = [self.minimum_width(i)+10 for i in xrange(cols)]
|
||||||
for i in xrange(len(widths)):
|
for i in xrange(len(widths)):
|
||||||
wp = self.width_percent(i)
|
wp = self.width_percent(i)
|
||||||
if wp >= 0.:
|
if wp >= 0.:
|
||||||
widths[i] = max(min_widths[i], ceil((wp/100.) * (maxwidth - (cols-1)*self.colpad)))
|
widths[i] = max(min_widths[i], ceil((wp/100.) * (maxwidth - (cols-1)*self.colpad)))
|
||||||
|
|
||||||
|
|
||||||
itercount = 0
|
itercount = 0
|
||||||
|
|
||||||
while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100:
|
while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100:
|
||||||
for i in range(cols):
|
for i in range(cols):
|
||||||
widths[i] = ceil((95./100.)*widths[i]) if \
|
widths[i] = ceil((95./100.)*widths[i]) if \
|
||||||
ceil((95./100.)*widths[i]) >= min_widths[i] else widths[i]
|
ceil((95./100.)*widths[i]) >= min_widths[i] else widths[i]
|
||||||
itercount += 1
|
itercount += 1
|
||||||
|
|
||||||
return [i+self.colpad for i in widths]
|
return [i+self.colpad for i in widths]
|
||||||
|
|
||||||
def blocks(self, maxwidth, maxheight):
|
def blocks(self, maxwidth, maxheight):
|
||||||
rows, cols = self.number_or_rows(), self.number_of_columns()
|
rows, cols = self.number_or_rows(), self.number_of_columns()
|
||||||
cellmatrix = [[None for c in range(cols)] for r in range(rows)]
|
cellmatrix = [[None for c in range(cols)] for r in range(rows)]
|
||||||
rowpos = [0 for i in range(rows)]
|
rowpos = [0 for i in range(rows)]
|
||||||
for r in range(rows):
|
for r in range(rows):
|
||||||
nc = self.rows[r].cell_iterator()
|
nc = self.rows[r].cell_iterator()
|
||||||
@ -358,14 +358,14 @@ class Table(object):
|
|||||||
break
|
break
|
||||||
except StopIteration: # No more cells in this row
|
except StopIteration: # No more cells in this row
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
widths = self.get_widths(maxwidth)
|
widths = self.get_widths(maxwidth)
|
||||||
heights = [row.height(widths) for row in self.rows]
|
heights = [row.height(widths) for row in self.rows]
|
||||||
|
|
||||||
xpos = [sum(widths[:i]) for i in range(cols)]
|
xpos = [sum(widths[:i]) for i in range(cols)]
|
||||||
delta = maxwidth - sum(widths)
|
delta = maxwidth - sum(widths)
|
||||||
if delta < 0:
|
if delta < 0:
|
||||||
delta = 0
|
delta = 0
|
||||||
for r in range(len(cellmatrix)):
|
for r in range(len(cellmatrix)):
|
||||||
yield None, 0, heights[r], 0, self.rows[r].targets
|
yield None, 0, heights[r], 0, self.rows[r].targets
|
||||||
@ -377,13 +377,13 @@ class Table(object):
|
|||||||
sypos = 0
|
sypos = 0
|
||||||
for tb in cell.text_blocks:
|
for tb in cell.text_blocks:
|
||||||
tb.blockStyle = self.conv.book.create_block_style(
|
tb.blockStyle = self.conv.book.create_block_style(
|
||||||
blockwidth=width,
|
blockwidth=width,
|
||||||
blockheight=cell.text_block_size(tb, width)[1],
|
blockheight=cell.text_block_size(tb, width)[1],
|
||||||
blockrule='horz-fixed')
|
blockrule='horz-fixed')
|
||||||
|
|
||||||
yield tb, xpos[c], sypos, delta, None
|
yield tb, xpos[c], sypos, delta, None
|
||||||
sypos += tb.blockStyle.attrs['blockheight']
|
sypos += tb.blockStyle.attrs['blockheight']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,81 +1,81 @@
|
|||||||
""" elements.py -- replacements and helpers for ElementTree """
|
""" elements.py -- replacements and helpers for ElementTree """
|
||||||
|
|
||||||
class ElementWriter(object):
|
class ElementWriter(object):
|
||||||
def __init__(self, e, header=False, sourceEncoding="ascii",
|
def __init__(self, e, header=False, sourceEncoding="ascii",
|
||||||
spaceBeforeClose=True, outputEncodingName="UTF-16"):
|
spaceBeforeClose=True, outputEncodingName="UTF-16"):
|
||||||
self.header = header
|
self.header = header
|
||||||
self.e = e
|
self.e = e
|
||||||
self.sourceEncoding=sourceEncoding
|
self.sourceEncoding=sourceEncoding
|
||||||
self.spaceBeforeClose = spaceBeforeClose
|
self.spaceBeforeClose = spaceBeforeClose
|
||||||
self.outputEncodingName = outputEncodingName
|
self.outputEncodingName = outputEncodingName
|
||||||
|
|
||||||
|
|
||||||
def _encodeCdata(self, rawText):
|
def _encodeCdata(self, rawText):
|
||||||
if type(rawText) is str:
|
if type(rawText) is str:
|
||||||
rawText = rawText.decode(self.sourceEncoding)
|
rawText = rawText.decode(self.sourceEncoding)
|
||||||
|
|
||||||
text = rawText.replace("&", "&")
|
text = rawText.replace("&", "&")
|
||||||
text = text.replace("<", "<")
|
text = text.replace("<", "<")
|
||||||
text = text.replace(">", ">")
|
text = text.replace(">", ">")
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def _writeAttribute(self, f, name, value):
|
def _writeAttribute(self, f, name, value):
|
||||||
f.write(u' %s="' % unicode(name))
|
f.write(u' %s="' % unicode(name))
|
||||||
if not isinstance(value, basestring):
|
if not isinstance(value, basestring):
|
||||||
value = unicode(value)
|
value = unicode(value)
|
||||||
value = self._encodeCdata(value)
|
value = self._encodeCdata(value)
|
||||||
value = value.replace('"', '"')
|
value = value.replace('"', '"')
|
||||||
f.write(value)
|
f.write(value)
|
||||||
f.write(u'"')
|
f.write(u'"')
|
||||||
|
|
||||||
|
|
||||||
def _writeText(self, f, rawText):
|
def _writeText(self, f, rawText):
|
||||||
text = self._encodeCdata(rawText)
|
text = self._encodeCdata(rawText)
|
||||||
f.write(text)
|
f.write(text)
|
||||||
|
|
||||||
|
|
||||||
def _write(self, f, e):
|
def _write(self, f, e):
|
||||||
f.write(u'<' + unicode(e.tag))
|
f.write(u'<' + unicode(e.tag))
|
||||||
|
|
||||||
attributes = e.items()
|
attributes = e.items()
|
||||||
attributes.sort()
|
attributes.sort()
|
||||||
for name, value in attributes:
|
for name, value in attributes:
|
||||||
self._writeAttribute(f, name, value)
|
self._writeAttribute(f, name, value)
|
||||||
|
|
||||||
if e.text is not None or len(e) > 0:
|
if e.text is not None or len(e) > 0:
|
||||||
f.write(u'>')
|
f.write(u'>')
|
||||||
|
|
||||||
if e.text:
|
if e.text:
|
||||||
self._writeText(f, e.text)
|
self._writeText(f, e.text)
|
||||||
|
|
||||||
for e2 in e:
|
for e2 in e:
|
||||||
self._write(f, e2)
|
self._write(f, e2)
|
||||||
|
|
||||||
f.write(u'</%s>' % e.tag)
|
f.write(u'</%s>' % e.tag)
|
||||||
else:
|
else:
|
||||||
if self.spaceBeforeClose:
|
if self.spaceBeforeClose:
|
||||||
f.write(' ')
|
f.write(' ')
|
||||||
f.write(u'/>')
|
f.write(u'/>')
|
||||||
|
|
||||||
if e.tail is not None:
|
if e.tail is not None:
|
||||||
self._writeText(f, e.tail)
|
self._writeText(f, e.tail)
|
||||||
|
|
||||||
|
|
||||||
def toString(self):
|
def toString(self):
|
||||||
class x:
|
class x:
|
||||||
pass
|
pass
|
||||||
buffer = []
|
buffer = []
|
||||||
x.write = buffer.append
|
x.write = buffer.append
|
||||||
self.write(x)
|
self.write(x)
|
||||||
return u''.join(buffer)
|
return u''.join(buffer)
|
||||||
|
|
||||||
|
|
||||||
def write(self, f):
|
def write(self, f):
|
||||||
if self.header:
|
if self.header:
|
||||||
f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
|
f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
|
||||||
|
|
||||||
self._write(f, self.e)
|
self._write(f, self.e)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,43 +1,43 @@
|
|||||||
def _optimize(tagList, tagName, conversion):
|
def _optimize(tagList, tagName, conversion):
|
||||||
# copy the tag of interest plus any text
|
# copy the tag of interest plus any text
|
||||||
newTagList = []
|
newTagList = []
|
||||||
for tag in tagList:
|
for tag in tagList:
|
||||||
if tag.name == tagName or tag.name == "rawtext":
|
if tag.name == tagName or tag.name == "rawtext":
|
||||||
newTagList.append(tag)
|
newTagList.append(tag)
|
||||||
|
|
||||||
# now, eliminate any duplicates (leaving the last one)
|
# now, eliminate any duplicates (leaving the last one)
|
||||||
for i, newTag in enumerate(newTagList[:-1]):
|
for i, newTag in enumerate(newTagList[:-1]):
|
||||||
if newTag.name == tagName and newTagList[i+1].name == tagName:
|
if newTag.name == tagName and newTagList[i+1].name == tagName:
|
||||||
tagList.remove(newTag)
|
tagList.remove(newTag)
|
||||||
|
|
||||||
# eliminate redundant settings to same value across text strings
|
# eliminate redundant settings to same value across text strings
|
||||||
newTagList = []
|
newTagList = []
|
||||||
for tag in tagList:
|
for tag in tagList:
|
||||||
if tag.name == tagName:
|
if tag.name == tagName:
|
||||||
newTagList.append(tag)
|
newTagList.append(tag)
|
||||||
|
|
||||||
for i, newTag in enumerate(newTagList[:-1]):
|
for i, newTag in enumerate(newTagList[:-1]):
|
||||||
value = conversion(newTag.parameter)
|
value = conversion(newTag.parameter)
|
||||||
nextValue = conversion(newTagList[i+1].parameter)
|
nextValue = conversion(newTagList[i+1].parameter)
|
||||||
if value == nextValue:
|
if value == nextValue:
|
||||||
tagList.remove(newTagList[i+1])
|
tagList.remove(newTagList[i+1])
|
||||||
|
|
||||||
# eliminate any setting that don't have text after them
|
# eliminate any setting that don't have text after them
|
||||||
while len(tagList) > 0 and tagList[-1].name == tagName:
|
while len(tagList) > 0 and tagList[-1].name == tagName:
|
||||||
del tagList[-1]
|
del tagList[-1]
|
||||||
|
|
||||||
|
|
||||||
def tagListOptimizer(tagList):
|
def tagListOptimizer(tagList):
|
||||||
# this function eliminates redundant or unnecessary tags
|
# this function eliminates redundant or unnecessary tags
|
||||||
# it scans a list of tags, looking for text settings that are
|
# it scans a list of tags, looking for text settings that are
|
||||||
# changed before any text is output
|
# changed before any text is output
|
||||||
# for example,
|
# for example,
|
||||||
# fontsize=100, fontsize=200, text, fontsize=100, fontsize=200
|
# fontsize=100, fontsize=200, text, fontsize=100, fontsize=200
|
||||||
# should be:
|
# should be:
|
||||||
# fontsize=200 text
|
# fontsize=200 text
|
||||||
oldSize = len(tagList)
|
oldSize = len(tagList)
|
||||||
_optimize(tagList, "fontsize", int)
|
_optimize(tagList, "fontsize", int)
|
||||||
_optimize(tagList, "fontweight", int)
|
_optimize(tagList, "fontweight", int)
|
||||||
return oldSize - len(tagList)
|
return oldSize - len(tagList)
|
||||||
|
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -2,4 +2,6 @@
|
|||||||
# Initialize extensions
|
# Initialize extensions
|
||||||
from calibre.ebooks.markdown import mdx_footnotes
|
from calibre.ebooks.markdown import mdx_footnotes
|
||||||
from calibre.ebooks.markdown import mdx_tables
|
from calibre.ebooks.markdown import mdx_tables
|
||||||
from calibre.ebooks.markdown import mdx_toc
|
from calibre.ebooks.markdown import mdx_toc
|
||||||
|
|
||||||
|
mdx_footnotes, mdx_tables, mdx_toc
|
||||||
|
@ -8,8 +8,6 @@ My markdown extensions for adding:
|
|||||||
Table of Contents (aka toc)
|
Table of Contents (aka toc)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import re
|
import re
|
||||||
import markdown
|
import markdown
|
||||||
|
|
||||||
@ -18,7 +16,7 @@ DEFAULT_TITLE = None
|
|||||||
def extract_alphanumeric(in_str=None):
|
def extract_alphanumeric(in_str=None):
|
||||||
"""take alpha-numeric (7bit ascii) and return as a string
|
"""take alpha-numeric (7bit ascii) and return as a string
|
||||||
"""
|
"""
|
||||||
# I'm sure this is really inefficient and
|
# I'm sure this is really inefficient and
|
||||||
# could be done with a lambda/map()
|
# could be done with a lambda/map()
|
||||||
#x.strip().title().replace(' ', "")
|
#x.strip().title().replace(' ', "")
|
||||||
out_str=[]
|
out_str=[]
|
||||||
@ -42,7 +40,7 @@ class TocExtension (markdown.Extension):
|
|||||||
toc is returned in a div tag with class='toc'
|
toc is returned in a div tag with class='toc'
|
||||||
toc is either:
|
toc is either:
|
||||||
appended to end of document
|
appended to end of document
|
||||||
OR
|
OR
|
||||||
replaces first string occurence of "///Table of Contents Goes Here///"
|
replaces first string occurence of "///Table of Contents Goes Here///"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -75,7 +73,7 @@ class TocExtension (markdown.Extension):
|
|||||||
"""
|
"""
|
||||||
Creates Table Of Contents based on headers.
|
Creates Table Of Contents based on headers.
|
||||||
|
|
||||||
@returns: toc as a single as a dom element
|
@returns: toc as a single as a dom element
|
||||||
in a <div> tag with class='toc'
|
in a <div> tag with class='toc'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -85,9 +83,9 @@ class TocExtension (markdown.Extension):
|
|||||||
if element.type=='element':
|
if element.type=='element':
|
||||||
if headers_compiled_re.match(element.nodeName):
|
if headers_compiled_re.match(element.nodeName):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
headers_doc_list = doc.find(findHeadersFn)
|
headers_doc_list = doc.find(findHeadersFn)
|
||||||
|
|
||||||
# Insert anchor tags into dom
|
# Insert anchor tags into dom
|
||||||
generated_anchor_id=0
|
generated_anchor_id=0
|
||||||
headers_list=[]
|
headers_list=[]
|
||||||
@ -99,19 +97,19 @@ class TocExtension (markdown.Extension):
|
|||||||
if heading_type == self.auto_toc_heading_type:
|
if heading_type == self.auto_toc_heading_type:
|
||||||
min_header_size_found=min(min_header_size_found,
|
min_header_size_found=min(min_header_size_found,
|
||||||
heading_type)
|
heading_type)
|
||||||
|
|
||||||
html_anchor_name= (extract_alphanumeric(heading_title)
|
html_anchor_name= (extract_alphanumeric(heading_title)
|
||||||
+'__MD_autoTOC_%d' % (generated_anchor_id))
|
+'__MD_autoTOC_%d' % (generated_anchor_id))
|
||||||
|
|
||||||
# insert anchor tag inside header tags
|
# insert anchor tag inside header tags
|
||||||
html_anchor = doc.createElement("a")
|
html_anchor = doc.createElement("a")
|
||||||
html_anchor.setAttribute('name', html_anchor_name)
|
html_anchor.setAttribute('name', html_anchor_name)
|
||||||
element.appendChild(html_anchor)
|
element.appendChild(html_anchor)
|
||||||
|
|
||||||
headers_list.append( (heading_type, heading_title,
|
headers_list.append( (heading_type, heading_title,
|
||||||
html_anchor_name) )
|
html_anchor_name) )
|
||||||
generated_anchor_id = generated_anchor_id + 1
|
generated_anchor_id = generated_anchor_id + 1
|
||||||
|
|
||||||
# create dom for toc
|
# create dom for toc
|
||||||
if headers_list != []:
|
if headers_list != []:
|
||||||
# Create list
|
# Create list
|
||||||
@ -125,9 +123,9 @@ class TocExtension (markdown.Extension):
|
|||||||
toc_doc_link.appendChild(toc_doc_text)
|
toc_doc_link.appendChild(toc_doc_text)
|
||||||
toc_doc_entry.appendChild(toc_doc_link)
|
toc_doc_entry.appendChild(toc_doc_link)
|
||||||
toc_doc_list.appendChild(toc_doc_entry)
|
toc_doc_list.appendChild(toc_doc_entry)
|
||||||
|
|
||||||
|
|
||||||
# Put list into div
|
# Put list into div
|
||||||
div = doc.createElement("div")
|
div = doc.createElement("div")
|
||||||
div.setAttribute('class', 'toc')
|
div.setAttribute('class', 'toc')
|
||||||
if self.TOC_TITLE:
|
if self.TOC_TITLE:
|
||||||
@ -149,7 +147,7 @@ class TocPostprocessor (markdown.Postprocessor):
|
|||||||
|
|
||||||
def run(self, doc):
|
def run(self, doc):
|
||||||
tocPlaceholder = self.toc.findTocPlaceholder(doc)
|
tocPlaceholder = self.toc.findTocPlaceholder(doc)
|
||||||
|
|
||||||
tocDiv = self.toc.createTocDiv(doc)
|
tocDiv = self.toc.createTocDiv(doc)
|
||||||
if tocDiv:
|
if tocDiv:
|
||||||
if tocPlaceholder :
|
if tocPlaceholder :
|
||||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
|
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
|
||||||
'''Read meta information from IMP files'''
|
'''Read meta information from IMP files'''
|
||||||
|
|
||||||
import sys, os
|
import sys
|
||||||
|
|
||||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||||
|
|
||||||
@ -17,7 +17,7 @@ def get_metadata(stream):
|
|||||||
if stream.read(10) not in MAGIC:
|
if stream.read(10) not in MAGIC:
|
||||||
print >>sys.stderr, u'Couldn\'t read IMP header from file'
|
print >>sys.stderr, u'Couldn\'t read IMP header from file'
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
def cString(skip=0):
|
def cString(skip=0):
|
||||||
result = ''
|
result = ''
|
||||||
while 1:
|
while 1:
|
||||||
@ -30,7 +30,7 @@ def get_metadata(stream):
|
|||||||
|
|
||||||
stream.read(38) # skip past some uninteresting headers
|
stream.read(38) # skip past some uninteresting headers
|
||||||
_, category, title, author = cString(), cString(), cString(1), cString(2)
|
_, category, title, author = cString(), cString(), cString(1), cString(2)
|
||||||
|
|
||||||
if title:
|
if title:
|
||||||
mi.title = title
|
mi.title = title
|
||||||
if author:
|
if author:
|
||||||
|
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Read metadata from LRX files
|
Read metadata from LRX files
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import sys, struct
|
import struct
|
||||||
from zlib import decompress
|
from zlib import decompress
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@ -33,7 +33,7 @@ def short_be(buf):
|
|||||||
def get_metadata(f):
|
def get_metadata(f):
|
||||||
read = lambda at, amount: _read(f, at, amount)
|
read = lambda at, amount: _read(f, at, amount)
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
buf = f.read(12)
|
buf = f.read(12)
|
||||||
if buf[4:] == 'ftypLRX2':
|
if buf[4:] == 'ftypLRX2':
|
||||||
offset = 0
|
offset = 0
|
||||||
while True:
|
while True:
|
||||||
@ -74,9 +74,9 @@ def get_metadata(f):
|
|||||||
mi.tags = [x.text for x in bi.findall('Category')]
|
mi.tags = [x.text for x in bi.findall('Category')]
|
||||||
mi.language = root.find('DocInfo').find('Language').text
|
mi.language = root.find('DocInfo').find('Language').text
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
elif buf[4:8] == 'LRX':
|
elif buf[4:8] == 'LRX':
|
||||||
raise ValueError('Librie LRX format not supported')
|
raise ValueError('Librie LRX format not supported')
|
||||||
else:
|
else:
|
||||||
raise ValueError('Not a LRX file')
|
raise ValueError('Not a LRX file')
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#
|
#
|
||||||
# Contributor(s):
|
# Contributor(s):
|
||||||
#
|
#
|
||||||
import zipfile, sys, re
|
import zipfile, re
|
||||||
import xml.sax.saxutils
|
import xml.sax.saxutils
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
@ -46,7 +46,7 @@ fields = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
def normalize(str):
|
def normalize(str):
|
||||||
"""
|
"""
|
||||||
The normalize-space function returns the argument string with whitespace
|
The normalize-space function returns the argument string with whitespace
|
||||||
normalized by stripping leading and trailing whitespace and replacing
|
normalized by stripping leading and trailing whitespace and replacing
|
||||||
sequences of whitespace characters by a single space.
|
sequences of whitespace characters by a single space.
|
||||||
@ -125,7 +125,7 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator):
|
|||||||
else:
|
else:
|
||||||
texttag = self._tag
|
texttag = self._tag
|
||||||
self.seenfields[texttag] = self.data()
|
self.seenfields[texttag] = self.data()
|
||||||
|
|
||||||
if field in self.deletefields:
|
if field in self.deletefields:
|
||||||
self.output.dowrite = True
|
self.output.dowrite = True
|
||||||
else:
|
else:
|
||||||
@ -140,7 +140,7 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator):
|
|||||||
|
|
||||||
def data(self):
|
def data(self):
|
||||||
return normalize(''.join(self._data))
|
return normalize(''.join(self._data))
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
zin = zipfile.ZipFile(stream, 'r')
|
zin = zipfile.ZipFile(stream, 'r')
|
||||||
odfs = odfmetaparser()
|
odfs = odfmetaparser()
|
||||||
@ -161,6 +161,6 @@ def get_metadata(stream):
|
|||||||
mi.language = data['language']
|
mi.language = data['language']
|
||||||
if data.get('keywords', ''):
|
if data.get('keywords', ''):
|
||||||
mi.tags = data['keywords'].split(',')
|
mi.tags = data['keywords'].split(',')
|
||||||
|
|
||||||
return mi
|
return mi
|
||||||
|
|
||||||
|
@ -3,8 +3,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
@ -20,5 +20,5 @@ def get_metadata(stream):
|
|||||||
stream = StringIO(zf.read(f))
|
stream = StringIO(zf.read(f))
|
||||||
return get_metadata(stream, stream_type)
|
return get_metadata(stream, stream_type)
|
||||||
raise ValueError('No ebook found in ZIP archive')
|
raise ValueError('No ebook found in ZIP archive')
|
||||||
|
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
'''
|
'''
|
||||||
Writer content to palmdoc pdb file.
|
Writer content to palmdoc pdb file.
|
||||||
'''
|
'''
|
||||||
import os
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
@ -4,7 +4,6 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
class zTXTError(Exception):
|
class zTXTError(Exception):
|
||||||
pass
|
pass
|
||||||
|
@ -12,8 +12,6 @@ Decrypt content of PDF.
|
|||||||
import os, sys
|
import os, sys
|
||||||
from optparse import OptionGroup, Option
|
from optparse import OptionGroup, Option
|
||||||
|
|
||||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
@ -36,8 +34,8 @@ OPTIONS = set([
|
|||||||
|
|
||||||
class DecryptionError(Exception):
|
class DecryptionError(Exception):
|
||||||
def __init__(self, pdf_path):
|
def __init__(self, pdf_path):
|
||||||
self.value = 'Unable to decrypt file `%s`.' % value
|
self.value = 'Unable to decrypt file `%s`.' % pdf_path
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return repr(self.value)
|
return repr(self.value)
|
||||||
|
|
||||||
@ -62,20 +60,20 @@ def add_options(parser):
|
|||||||
group = OptionGroup(parser, _('Decrypt Options:'), _('Options to control the transformation of pdf'))
|
group = OptionGroup(parser, _('Decrypt Options:'), _('Options to control the transformation of pdf'))
|
||||||
parser.add_option_group(group)
|
parser.add_option_group(group)
|
||||||
add_option = group.add_option
|
add_option = group.add_option
|
||||||
|
|
||||||
for rec in OPTIONS:
|
for rec in OPTIONS:
|
||||||
option_recommendation_to_cli_option(add_option, rec)
|
option_recommendation_to_cli_option(add_option, rec)
|
||||||
|
|
||||||
def decrypt(pdf_path, out_path, password):
|
def decrypt(pdf_path, out_path, password):
|
||||||
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||||
|
|
||||||
if pdf.decrypt(str(password)) == 0:
|
if pdf.decrypt(str(password)) == 0:
|
||||||
raise DecryptionError(pdf_path)
|
raise DecryptionError(pdf_path)
|
||||||
|
|
||||||
title = pdf.documentInfo.title if pdf.documentInfo.title else _('Unknown')
|
title = pdf.documentInfo.title if pdf.documentInfo.title else _('Unknown')
|
||||||
author = pdf.documentInfo.author if pdf.documentInfo.author else _('Unknown')
|
author = pdf.documentInfo.author if pdf.documentInfo.author else _('Unknown')
|
||||||
out_pdf = PdfFileWriter(title=title, author=author)
|
out_pdf = PdfFileWriter(title=title, author=author)
|
||||||
|
|
||||||
for page in pdf.pages:
|
for page in pdf.pages:
|
||||||
out_pdf.addPage(page)
|
out_pdf.addPage(page)
|
||||||
|
|
||||||
@ -86,23 +84,23 @@ def main(args=sys.argv, name=''):
|
|||||||
log = Log()
|
log = Log()
|
||||||
parser = option_parser(name)
|
parser = option_parser(name)
|
||||||
add_options(parser)
|
add_options(parser)
|
||||||
|
|
||||||
opts, args = parser.parse_args(args)
|
opts, args = parser.parse_args(args)
|
||||||
args = args[1:]
|
args = args[1:]
|
||||||
|
|
||||||
if len(args) < 2:
|
if len(args) < 2:
|
||||||
print 'Error: A PDF file and decryption password is required.\n'
|
print 'Error: A PDF file and decryption password is required.\n'
|
||||||
print_help(parser, log)
|
print_help(parser, log)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not is_valid_pdf(args[0]):
|
if not is_valid_pdf(args[0]):
|
||||||
print 'Error: Could not read file `%s`.' % args[0]
|
print 'Error: Could not read file `%s`.' % args[0]
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not is_encrypted(args[0]):
|
if not is_encrypted(args[0]):
|
||||||
print 'Error: file `%s` is not encrypted.' % args[0]
|
print 'Error: file `%s` is not encrypted.' % args[0]
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
decrypt(args[0], opts.output, args[1])
|
decrypt(args[0], opts.output, args[1])
|
||||||
except DecryptionError, e:
|
except DecryptionError, e:
|
||||||
|
@ -17,6 +17,8 @@ from calibre.utils.logging import Log
|
|||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation
|
||||||
from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
|
from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
|
||||||
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||||
|
|
||||||
from pyPdf import PdfFileWriter, PdfFileReader
|
from pyPdf import PdfFileWriter, PdfFileReader
|
||||||
|
|
||||||
@ -52,7 +54,7 @@ def add_options(parser):
|
|||||||
group = OptionGroup(parser, _('Encrypt Options:'), _('Options to control the transformation of pdf'))
|
group = OptionGroup(parser, _('Encrypt Options:'), _('Options to control the transformation of pdf'))
|
||||||
parser.add_option_group(group)
|
parser.add_option_group(group)
|
||||||
add_option = group.add_option
|
add_option = group.add_option
|
||||||
|
|
||||||
for rec in OPTIONS:
|
for rec in OPTIONS:
|
||||||
option_recommendation_to_cli_option(add_option, rec)
|
option_recommendation_to_cli_option(add_option, rec)
|
||||||
|
|
||||||
@ -78,23 +80,23 @@ def main(args=sys.argv, name=''):
|
|||||||
log = Log()
|
log = Log()
|
||||||
parser = option_parser(name)
|
parser = option_parser(name)
|
||||||
add_options(parser)
|
add_options(parser)
|
||||||
|
|
||||||
opts, args = parser.parse_args(args)
|
opts, args = parser.parse_args(args)
|
||||||
args = args[1:]
|
args = args[1:]
|
||||||
|
|
||||||
if len(args) < 2:
|
if len(args) < 2:
|
||||||
print 'Error: A PDF file and decryption password is required.\n'
|
print 'Error: A PDF file and decryption password is required.\n'
|
||||||
print_help(parser, log)
|
print_help(parser, log)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if not is_valid_pdf(args[0]):
|
if not is_valid_pdf(args[0]):
|
||||||
print 'Error: Could not read file `%s`.' % args[0]
|
print 'Error: Could not read file `%s`.' % args[0]
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if is_encrypted(args[0]):
|
if is_encrypted(args[0]):
|
||||||
print 'Error: file `%s` is already encrypted.' % args[0]
|
print 'Error: file `%s` is already encrypted.' % args[0]
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
mi = metadata_from_formats([args[0]])
|
mi = metadata_from_formats([args[0]])
|
||||||
|
|
||||||
encrypt(args[0], opts.output, args[1], mi)
|
encrypt(args[0], opts.output, args[1], mi)
|
||||||
|
@ -11,25 +11,25 @@ Verify PDF files.
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from pyPdf import PdfFileWriter, PdfFileReader
|
from pyPdf import PdfFileReader
|
||||||
|
|
||||||
def is_valid_pdf(pdf_path):
|
def is_valid_pdf(pdf_path):
|
||||||
'''
|
'''
|
||||||
Returns True if the pdf file is valid.
|
Returns True if the pdf file is valid.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||||
pdf = PdfFileReader(pdf_file)
|
pdf = PdfFileReader(pdf_file)
|
||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def is_valid_pdfs(pdf_paths):
|
def is_valid_pdfs(pdf_paths):
|
||||||
'''
|
'''
|
||||||
Returns a list of invalid pdf files.
|
Returns a list of invalid pdf files.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
invalid = []
|
invalid = []
|
||||||
for pdf_path in pdf_paths:
|
for pdf_path in pdf_paths:
|
||||||
if not is_valid_pdf(pdf_path):
|
if not is_valid_pdf(pdf_path):
|
||||||
|
@ -4,7 +4,6 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
|
||||||
import struct
|
import struct
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
# #
|
# #
|
||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, shutil
|
import os, shutil
|
||||||
|
|
||||||
class Copy:
|
class Copy:
|
||||||
"""Copy each changed file to a directory for debugging purposes"""
|
"""Copy each changed file to a directory for debugging purposes"""
|
||||||
@ -66,6 +66,6 @@ class Copy:
|
|||||||
"""
|
"""
|
||||||
write_file = os.path.join(Copy.__dir,new_file)
|
write_file = os.path.join(Copy.__dir,new_file)
|
||||||
shutil.copyfile(file, write_file)
|
shutil.copyfile(file, write_file)
|
||||||
|
|
||||||
def rename(self, source, dest):
|
def rename(self, source, dest):
|
||||||
shutil.copyfile(source, dest)
|
shutil.copyfile(source, dest)
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import sys
|
import sys
|
||||||
from calibre.ebooks import rtf2xml
|
|
||||||
class ParseOptions:
|
class ParseOptions:
|
||||||
"""
|
"""
|
||||||
Requires:
|
Requires:
|
||||||
|
@ -16,7 +16,6 @@
|
|||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys, os, codecs
|
import sys, os, codecs
|
||||||
from calibre.ebooks import rtf2xml
|
|
||||||
class Output:
|
class Output:
|
||||||
"""
|
"""
|
||||||
Output file
|
Output file
|
||||||
|
@ -15,8 +15,6 @@
|
|||||||
# #
|
# #
|
||||||
# #
|
# #
|
||||||
#########################################################################
|
#########################################################################
|
||||||
import sys,os
|
|
||||||
from calibre.ebooks import rtf2xml
|
|
||||||
class OverrideTable:
|
class OverrideTable:
|
||||||
"""
|
"""
|
||||||
Parse a line of text to make the override table. Return a string
|
Parse a line of text to make the override table. Return a string
|
||||||
|
@ -7,21 +7,19 @@ from calibre.gui2 import file_icon_provider
|
|||||||
from calibre.gui2.dialogs.choose_format_ui import Ui_ChooseFormatDialog
|
from calibre.gui2.dialogs.choose_format_ui import Ui_ChooseFormatDialog
|
||||||
|
|
||||||
class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog):
|
class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog):
|
||||||
|
|
||||||
def __init__(self, window, msg, formats):
|
def __init__(self, window, msg, formats):
|
||||||
QDialog.__init__(self, window)
|
QDialog.__init__(self, window)
|
||||||
Ui_ChooseFormatDialog.__init__(self)
|
Ui_ChooseFormatDialog.__init__(self)
|
||||||
self.setupUi(self)
|
self.setupUi(self)
|
||||||
self.connect(self.formats, SIGNAL('activated(QModelIndex)'), lambda i: self.accept())
|
self.connect(self.formats, SIGNAL('activated(QModelIndex)'), lambda i: self.accept())
|
||||||
|
|
||||||
self.msg.setText(msg)
|
self.msg.setText(msg)
|
||||||
for format in formats:
|
for format in formats:
|
||||||
self.formats.addItem(QListWidgetItem(file_icon_provider().icon_from_ext(format.lower()),
|
self.formats.addItem(QListWidgetItem(file_icon_provider().icon_from_ext(format.lower()),
|
||||||
format.upper()))
|
format.upper()))
|
||||||
self._formats = formats
|
self._formats = formats
|
||||||
self.formats.setCurrentRow(0)
|
self.formats.setCurrentRow(0)
|
||||||
|
|
||||||
def format(self):
|
def format(self):
|
||||||
return self._formats[self.formats.currentRow()]
|
return self._formats[self.formats.currentRow()]
|
||||||
|
|
||||||
|
|
@ -5,7 +5,7 @@ from PyQt4.QtGui import QDialog
|
|||||||
from calibre.gui2.dialogs.conversion_error_ui import Ui_ConversionErrorDialog
|
from calibre.gui2.dialogs.conversion_error_ui import Ui_ConversionErrorDialog
|
||||||
|
|
||||||
class ConversionErrorDialog(QDialog, Ui_ConversionErrorDialog):
|
class ConversionErrorDialog(QDialog, Ui_ConversionErrorDialog):
|
||||||
|
|
||||||
def __init__(self, window, title, html, show=False):
|
def __init__(self, window, title, html, show=False):
|
||||||
QDialog.__init__(self, window)
|
QDialog.__init__(self, window)
|
||||||
Ui_ConversionErrorDialog.__init__(self)
|
Ui_ConversionErrorDialog.__init__(self)
|
||||||
@ -14,7 +14,7 @@ class ConversionErrorDialog(QDialog, Ui_ConversionErrorDialog):
|
|||||||
self.set_message(html)
|
self.set_message(html)
|
||||||
if show:
|
if show:
|
||||||
self.show()
|
self.show()
|
||||||
|
|
||||||
def set_message(self, html):
|
def set_message(self, html):
|
||||||
self.text.setHtml('<html><body>%s</body></html'%(html,))
|
self.text.setHtml('<html><body>%s</body></html'%(html,))
|
||||||
|
|
||||||
|
@ -5,20 +5,20 @@ from PyQt4.QtGui import QGraphicsView
|
|||||||
from PyQt4.QtCore import QSize
|
from PyQt4.QtCore import QSize
|
||||||
|
|
||||||
class BookView(QGraphicsView):
|
class BookView(QGraphicsView):
|
||||||
|
|
||||||
MINIMUM_SIZE = QSize(400, 500)
|
MINIMUM_SIZE = QSize(400, 500)
|
||||||
|
|
||||||
def __init__(self, *args):
|
def __init__(self, *args):
|
||||||
QGraphicsView.__init__(self, *args)
|
QGraphicsView.__init__(self, *args)
|
||||||
self.preferred_size = self.MINIMUM_SIZE
|
self.preferred_size = self.MINIMUM_SIZE
|
||||||
|
|
||||||
def minimumSizeHint(self):
|
def minimumSizeHint(self):
|
||||||
return self.MINIMUM_SIZE
|
return self.MINIMUM_SIZE
|
||||||
|
|
||||||
def sizeHint(self):
|
def sizeHint(self):
|
||||||
return self.preferred_size
|
return self.preferred_size
|
||||||
|
|
||||||
def resize_for(self, width, height):
|
def resize_for(self, width, height):
|
||||||
self.preferred_size = QSize(width, height)
|
self.preferred_size = QSize(width, height)
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import os, math, re
|
import os, math, re
|
||||||
from PyQt4.Qt import QWidget, QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
|
from PyQt4.Qt import QWidget, QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
|
||||||
QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
|
QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
|
||||||
QByteArray, QColor, QWheelEvent, QPoint, QImage, QRegion, \
|
QByteArray, QColor, QPoint, QImage, QRegion, \
|
||||||
QFont, QObject, QApplication, pyqtSignature
|
QFont, QObject, QApplication, pyqtSignature
|
||||||
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
|
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
|
||||||
|
|
||||||
|
@ -4,17 +4,14 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
|
||||||
|
|
||||||
import os, sys, traceback, urlparse
|
import os, sys, urlparse
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup, Tag
|
from BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
from calibre.ebooks.oeb.iterator import EbookIterator
|
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
|
||||||
|
|
||||||
from PyQt4 import QtCore
|
from PyQt4 import QtCore
|
||||||
from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, Qt, \
|
from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, Qt, \
|
||||||
QPrinter, QPrintPreviewDialog, QPrintDialog, QDialog, QMetaObject, Q_ARG
|
QPrinter, QPrintPreviewDialog, QPrintDialog, QDialog, QMetaObject, Q_ARG
|
||||||
from PyQt4 import QtCore
|
|
||||||
from PyQt4.QtWebKit import QWebView
|
from PyQt4.QtWebKit import QWebView
|
||||||
|
|
||||||
PRINTCSS = 'body{width:100%;margin:0;padding:0;font-family:Arial;color:#000;background:none;font-size:12pt;text-align:left;}h1,h2,h3,h4,h5,h6{font-family:Helvetica;}h1{font-size:19pt;}h2{font-size:17pt;}h3{font-size:15pt;}h4,h5,h6{font-size:12pt;}pre,code,samp{font:10ptCourier,monospace;white-space:pre-wrap;page-break-inside:avoid;}blockquote{margin:1.3em;padding:1em;font-size:10pt;}hr{background-color:#ccc;}aimg{border:none;}a:link,a:visited{background:transparent;font-weight:700;text-decoration:underline;color:#333;}a:link:after,a{color:#000;}table{margin:1px;text-align:left;}th{border-bottom:1pxsolid#333;font-weight:bold;}td{border-bottom:1pxsolid#333;}th,td{padding:4px10px4px0;}tfoot{font-style:italic;}caption{background:#fff;margin-bottom:2em;text-align:left;}thead{display:table-header-group;}tr{page-break-inside:avoid;}#header,.header,#footer,.footer,#navbar,.navbar,#navigation,.navigation,#rightSideBar,.rightSideBar,#leftSideBar,.leftSideBar{display:none;}'
|
PRINTCSS = 'body{width:100%;margin:0;padding:0;font-family:Arial;color:#000;background:none;font-size:12pt;text-align:left;}h1,h2,h3,h4,h5,h6{font-family:Helvetica;}h1{font-size:19pt;}h2{font-size:17pt;}h3{font-size:15pt;}h4,h5,h6{font-size:12pt;}pre,code,samp{font:10ptCourier,monospace;white-space:pre-wrap;page-break-inside:avoid;}blockquote{margin:1.3em;padding:1em;font-size:10pt;}hr{background-color:#ccc;}aimg{border:none;}a:link,a:visited{background:transparent;font-weight:700;text-decoration:underline;color:#333;}a:link:after,a{color:#000;}table{margin:1px;text-align:left;}th{border-bottom:1pxsolid#333;font-weight:bold;}td{border-bottom:1pxsolid#333;}th,td{padding:4px10px4px0;}tfoot{font-style:italic;}caption{background:#fff;margin-bottom:2em;text-align:left;}thead{display:table-header-group;}tr{page-break-inside:avoid;}#header,.header,#footer,.footer,#navbar,.navbar,#navigation,.navigation,#rightSideBar,.rightSideBar,#leftSideBar,.leftSideBar{display:none;}'
|
||||||
@ -31,18 +28,18 @@ class Printing(QObject):
|
|||||||
self.connect(self.view, SIGNAL('loadFinished(bool)'), self.print_preview)
|
self.connect(self.view, SIGNAL('loadFinished(bool)'), self.print_preview)
|
||||||
else:
|
else:
|
||||||
self.connect(self.view, SIGNAL('loadFinished(bool)'), self.print_book)
|
self.connect(self.view, SIGNAL('loadFinished(bool)'), self.print_book)
|
||||||
|
|
||||||
self.process_content(spine)
|
self.process_content(spine)
|
||||||
|
|
||||||
def process_content(self, spine):
|
def process_content(self, spine):
|
||||||
content = ''
|
content = ''
|
||||||
|
|
||||||
for path in spine:
|
for path in spine:
|
||||||
raw = self.raw_content(path)
|
raw = self.raw_content(path)
|
||||||
content += self.parsed_content(raw, path)
|
content += self.parsed_content(raw, path)
|
||||||
|
|
||||||
refined_content = self.refine_content(content)
|
refined_content = self.refine_content(content)
|
||||||
|
|
||||||
base = os.path.splitdrive(spine[0])[0]
|
base = os.path.splitdrive(spine[0])[0]
|
||||||
base = base if base != '' else '/'
|
base = base if base != '' else '/'
|
||||||
|
|
||||||
@ -52,7 +49,7 @@ class Printing(QObject):
|
|||||||
@QtCore.pyqtSignature('load_content(QString, QString)')
|
@QtCore.pyqtSignature('load_content(QString, QString)')
|
||||||
def load_content(self, content, base):
|
def load_content(self, content, base):
|
||||||
self.view.setHtml(content, QUrl(base))
|
self.view.setHtml(content, QUrl(base))
|
||||||
|
|
||||||
def raw_content(self, path):
|
def raw_content(self, path):
|
||||||
return open(path, 'rb').read().decode(path.encoding)
|
return open(path, 'rb').read().decode(path.encoding)
|
||||||
|
|
||||||
@ -64,11 +61,11 @@ class Printing(QObject):
|
|||||||
styles = dom_tree.findAll('style')
|
styles = dom_tree.findAll('style')
|
||||||
for s in styles:
|
for s in styles:
|
||||||
s.extract()
|
s.extract()
|
||||||
|
|
||||||
scripts = dom_tree.findAll('script')
|
scripts = dom_tree.findAll('script')
|
||||||
for s in scripts:
|
for s in scripts:
|
||||||
s.extract()
|
s.extract()
|
||||||
|
|
||||||
# Convert all relative links to absolute paths.
|
# Convert all relative links to absolute paths.
|
||||||
links = dom_tree.findAll(src=True)
|
links = dom_tree.findAll(src=True)
|
||||||
for s in links:
|
for s in links:
|
||||||
@ -85,40 +82,40 @@ class Printing(QObject):
|
|||||||
# Adds the print css.
|
# Adds the print css.
|
||||||
def refine_content(self, content):
|
def refine_content(self, content):
|
||||||
dom_tree = BeautifulSoup('<html><head></head><body>%s</body></html>' % content)
|
dom_tree = BeautifulSoup('<html><head></head><body>%s</body></html>' % content)
|
||||||
|
|
||||||
css = dom_tree.findAll('link')
|
css = dom_tree.findAll('link')
|
||||||
for c in css:
|
for c in css:
|
||||||
c.extract()
|
c.extract()
|
||||||
|
|
||||||
print_css = Tag(BeautifulSoup(), 'style', [('type', 'text/css'), ('title', 'override_css')])
|
print_css = Tag(BeautifulSoup(), 'style', [('type', 'text/css'), ('title', 'override_css')])
|
||||||
print_css.insert(0, PRINTCSS)
|
print_css.insert(0, PRINTCSS)
|
||||||
dom_tree.findAll('head')[0].insert(0, print_css)
|
dom_tree.findAll('head')[0].insert(0, print_css)
|
||||||
|
|
||||||
return unicode(dom_tree)
|
return unicode(dom_tree)
|
||||||
|
|
||||||
def print_preview(self, ok):
|
def print_preview(self, ok):
|
||||||
printer = QPrinter(QPrinter.HighResolution)
|
printer = QPrinter(QPrinter.HighResolution)
|
||||||
printer.setPageMargins(1, 1, 1, 1, QPrinter.Inch)
|
printer.setPageMargins(1, 1, 1, 1, QPrinter.Inch)
|
||||||
|
|
||||||
previewDialog = QPrintPreviewDialog(printer)
|
previewDialog = QPrintPreviewDialog(printer)
|
||||||
|
|
||||||
self.connect(previewDialog, SIGNAL('paintRequested(QPrinter *)'), self.view.print_)
|
self.connect(previewDialog, SIGNAL('paintRequested(QPrinter *)'), self.view.print_)
|
||||||
previewDialog.exec_()
|
previewDialog.exec_()
|
||||||
self.disconnect(previewDialog, SIGNAL('paintRequested(QPrinter *)'), self.view.print_)
|
self.disconnect(previewDialog, SIGNAL('paintRequested(QPrinter *)'), self.view.print_)
|
||||||
|
|
||||||
self.loop.quit()
|
self.loop.quit()
|
||||||
|
|
||||||
def print_book(self, ok):
|
def print_book(self, ok):
|
||||||
printer = QPrinter(QPrinter.HighResolution)
|
printer = QPrinter(QPrinter.HighResolution)
|
||||||
printer.setPageMargins(1, 1, 1, 1, QPrinter.Inch)
|
printer.setPageMargins(1, 1, 1, 1, QPrinter.Inch)
|
||||||
|
|
||||||
printDialog = QPrintDialog(printer)
|
printDialog = QPrintDialog(printer)
|
||||||
printDialog.setWindowTitle(_("Print eBook"))
|
printDialog.setWindowTitle(_("Print eBook"))
|
||||||
|
|
||||||
printDialog.exec_()
|
printDialog.exec_()
|
||||||
if printDialog.result() == QDialog.Accepted:
|
if printDialog.result() == QDialog.Accepted:
|
||||||
self.view.print_(printer)
|
self.view.print_(printer)
|
||||||
|
|
||||||
self.loop.quit()
|
self.loop.quit()
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -18,7 +18,7 @@ sys.path.append(os.path.abspath('../../../'))
|
|||||||
sys.path.append(os.path.abspath('.'))
|
sys.path.append(os.path.abspath('.'))
|
||||||
from calibre import __appname__, __version__
|
from calibre import __appname__, __version__
|
||||||
import custom
|
import custom
|
||||||
|
custom
|
||||||
# General configuration
|
# General configuration
|
||||||
# ---------------------
|
# ---------------------
|
||||||
|
|
||||||
|
@ -1,970 +0,0 @@
|
|||||||
""" path.py - An object representing a path to a file or directory.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||
from path import path
|
|
||||||
d = path('/home/guido/bin')
|
|
||||||
for f in d.files('*.py'):
|
|
||||||
f.chmod(0755)
|
|
||||||
|
|
||||||
This module requires Python 2.2 or later.
|
|
||||||
|
|
||||||
|
|
||||||
URL: http://www.jorendorff.com/articles/python/path
|
|
||||||
Author: Jason Orendorff <jason.orendorff\x40gmail\x2ecom> (and others - see the url!)
|
|
||||||
Date: 9 Mar 2007
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
# TODO
|
|
||||||
# - Tree-walking functions don't avoid symlink loops. Matt Harrison
|
|
||||||
# sent me a patch for this.
|
|
||||||
# - Bug in write_text(). It doesn't support Universal newline mode.
|
|
||||||
# - Better error message in listdir() when self isn't a
|
|
||||||
# directory. (On Windows, the error message really sucks.)
|
|
||||||
# - Make sure everything has a good docstring.
|
|
||||||
# - Add methods for regex find and replace.
|
|
||||||
# - guess_content_type() method?
|
|
||||||
# - Perhaps support arguments to touch().
|
|
||||||
|
|
||||||
from __future__ import generators
|
|
||||||
|
|
||||||
import sys, warnings, os, fnmatch, glob, shutil, codecs, hashlib
|
|
||||||
|
|
||||||
__version__ = '2.2'
|
|
||||||
__all__ = ['path']
|
|
||||||
|
|
||||||
# Platform-specific support for path.owner
|
|
||||||
if os.name == 'nt':
|
|
||||||
try:
|
|
||||||
import win32security
|
|
||||||
except ImportError:
|
|
||||||
win32security = None
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
import pwd
|
|
||||||
except ImportError:
|
|
||||||
pwd = None
|
|
||||||
|
|
||||||
# Pre-2.3 support. Are unicode filenames supported?
|
|
||||||
_base = str
|
|
||||||
_getcwd = os.getcwd
|
|
||||||
try:
|
|
||||||
if os.path.supports_unicode_filenames:
|
|
||||||
_base = unicode
|
|
||||||
_getcwd = os.getcwdu
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Pre-2.3 workaround for booleans
|
|
||||||
try:
|
|
||||||
True, False
|
|
||||||
except NameError:
|
|
||||||
True, False = 1, 0
|
|
||||||
|
|
||||||
# Pre-2.3 workaround for basestring.
|
|
||||||
try:
|
|
||||||
basestring
|
|
||||||
except NameError:
|
|
||||||
basestring = (str, unicode)
|
|
||||||
|
|
||||||
# Universal newline support
|
|
||||||
_textmode = 'r'
|
|
||||||
if hasattr(file, 'newlines'):
|
|
||||||
_textmode = 'U'
|
|
||||||
|
|
||||||
|
|
||||||
class TreeWalkWarning(Warning):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class path(_base):
|
|
||||||
""" Represents a filesystem path.
|
|
||||||
|
|
||||||
For documentation on individual methods, consult their
|
|
||||||
counterparts in os.path.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# --- Special Python methods.
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return 'path(%s)' % _base.__repr__(self)
|
|
||||||
|
|
||||||
# Adding a path and a string yields a path.
|
|
||||||
def __add__(self, more):
|
|
||||||
try:
|
|
||||||
resultStr = _base.__add__(self, more)
|
|
||||||
except TypeError: #Python bug
|
|
||||||
resultStr = NotImplemented
|
|
||||||
if resultStr is NotImplemented:
|
|
||||||
return resultStr
|
|
||||||
return self.__class__(resultStr)
|
|
||||||
|
|
||||||
def __radd__(self, other):
|
|
||||||
if isinstance(other, basestring):
|
|
||||||
return self.__class__(other.__add__(self))
|
|
||||||
else:
|
|
||||||
return NotImplemented
|
|
||||||
|
|
||||||
# The / operator joins paths.
|
|
||||||
def __div__(self, rel):
|
|
||||||
""" fp.__div__(rel) == fp / rel == fp.joinpath(rel)
|
|
||||||
|
|
||||||
Join two path components, adding a separator character if
|
|
||||||
needed.
|
|
||||||
"""
|
|
||||||
return self.__class__(os.path.join(self, rel))
|
|
||||||
|
|
||||||
# Make the / operator work even when true division is enabled.
|
|
||||||
__truediv__ = __div__
|
|
||||||
|
|
||||||
def getcwd(cls):
|
|
||||||
""" Return the current working directory as a path object. """
|
|
||||||
return cls(_getcwd())
|
|
||||||
getcwd = classmethod(getcwd)
|
|
||||||
|
|
||||||
|
|
||||||
# --- Operations on path strings.
|
|
||||||
|
|
||||||
isabs = os.path.isabs
|
|
||||||
def abspath(self): return self.__class__(os.path.abspath(self))
|
|
||||||
def normcase(self): return self.__class__(os.path.normcase(self))
|
|
||||||
def normpath(self): return self.__class__(os.path.normpath(self))
|
|
||||||
def realpath(self): return self.__class__(os.path.realpath(self))
|
|
||||||
def expanduser(self): return self.__class__(os.path.expanduser(self))
|
|
||||||
def expandvars(self): return self.__class__(os.path.expandvars(self))
|
|
||||||
def dirname(self): return self.__class__(os.path.dirname(self))
|
|
||||||
basename = os.path.basename
|
|
||||||
|
|
||||||
def expand(self):
|
|
||||||
""" Clean up a filename by calling expandvars(),
|
|
||||||
expanduser(), and normpath() on it.
|
|
||||||
|
|
||||||
This is commonly everything needed to clean up a filename
|
|
||||||
read from a configuration file, for example.
|
|
||||||
"""
|
|
||||||
return self.expandvars().expanduser().normpath()
|
|
||||||
|
|
||||||
def _get_namebase(self):
|
|
||||||
base, ext = os.path.splitext(self.name)
|
|
||||||
return base
|
|
||||||
|
|
||||||
def _get_ext(self):
|
|
||||||
f, ext = os.path.splitext(_base(self))
|
|
||||||
return ext
|
|
||||||
|
|
||||||
def _get_drive(self):
|
|
||||||
drive, r = os.path.splitdrive(self)
|
|
||||||
return self.__class__(drive)
|
|
||||||
|
|
||||||
parent = property(
|
|
||||||
dirname, None, None,
|
|
||||||
""" This path's parent directory, as a new path object.
|
|
||||||
|
|
||||||
For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib')
|
|
||||||
""")
|
|
||||||
|
|
||||||
name = property(
|
|
||||||
basename, None, None,
|
|
||||||
""" The name of this file or directory without the full path.
|
|
||||||
|
|
||||||
For example, path('/usr/local/lib/libpython.so').name == 'libpython.so'
|
|
||||||
""")
|
|
||||||
|
|
||||||
namebase = property(
|
|
||||||
_get_namebase, None, None,
|
|
||||||
""" The same as path.name, but with one file extension stripped off.
|
|
||||||
|
|
||||||
For example, path('/home/guido/python.tar.gz').name == 'python.tar.gz',
|
|
||||||
but path('/home/guido/python.tar.gz').namebase == 'python.tar'
|
|
||||||
""")
|
|
||||||
|
|
||||||
ext = property(
|
|
||||||
_get_ext, None, None,
|
|
||||||
""" The file extension, for example '.py'. """)
|
|
||||||
|
|
||||||
drive = property(
|
|
||||||
_get_drive, None, None,
|
|
||||||
""" The drive specifier, for example 'C:'.
|
|
||||||
This is always empty on systems that don't use drive specifiers.
|
|
||||||
""")
|
|
||||||
|
|
||||||
def splitpath(self):
|
|
||||||
""" p.splitpath() -> Return (p.parent, p.name). """
|
|
||||||
parent, child = os.path.split(self)
|
|
||||||
return self.__class__(parent), child
|
|
||||||
|
|
||||||
def splitdrive(self):
|
|
||||||
""" p.splitdrive() -> Return (p.drive, <the rest of p>).
|
|
||||||
|
|
||||||
Split the drive specifier from this path. If there is
|
|
||||||
no drive specifier, p.drive is empty, so the return value
|
|
||||||
is simply (path(''), p). This is always the case on Unix.
|
|
||||||
"""
|
|
||||||
drive, rel = os.path.splitdrive(self)
|
|
||||||
return self.__class__(drive), rel
|
|
||||||
|
|
||||||
def splitext(self):
|
|
||||||
""" p.splitext() -> Return (p.stripext(), p.ext).
|
|
||||||
|
|
||||||
Split the filename extension from this path and return
|
|
||||||
the two parts. Either part may be empty.
|
|
||||||
|
|
||||||
The extension is everything from '.' to the end of the
|
|
||||||
last path segment. This has the property that if
|
|
||||||
(a, b) == p.splitext(), then a + b == p.
|
|
||||||
"""
|
|
||||||
filename, ext = os.path.splitext(self)
|
|
||||||
return self.__class__(filename), ext
|
|
||||||
|
|
||||||
def stripext(self):
|
|
||||||
""" p.stripext() -> Remove one file extension from the path.
|
|
||||||
|
|
||||||
For example, path('/home/guido/python.tar.gz').stripext()
|
|
||||||
returns path('/home/guido/python.tar').
|
|
||||||
"""
|
|
||||||
return self.splitext()[0]
|
|
||||||
|
|
||||||
if hasattr(os.path, 'splitunc'):
|
|
||||||
def splitunc(self):
|
|
||||||
unc, rest = os.path.splitunc(self)
|
|
||||||
return self.__class__(unc), rest
|
|
||||||
|
|
||||||
def _get_uncshare(self):
|
|
||||||
unc, r = os.path.splitunc(self)
|
|
||||||
return self.__class__(unc)
|
|
||||||
|
|
||||||
uncshare = property(
|
|
||||||
_get_uncshare, None, None,
|
|
||||||
""" The UNC mount point for this path.
|
|
||||||
This is empty for paths on local drives. """)
|
|
||||||
|
|
||||||
def joinpath(self, *args):
|
|
||||||
""" Join two or more path components, adding a separator
|
|
||||||
character (os.sep) if needed. Returns a new path
|
|
||||||
object.
|
|
||||||
"""
|
|
||||||
return self.__class__(os.path.join(self, *args))
|
|
||||||
|
|
||||||
def splitall(self):
|
|
||||||
r""" Return a list of the path components in this path.
|
|
||||||
|
|
||||||
The first item in the list will be a path. Its value will be
|
|
||||||
either os.curdir, os.pardir, empty, or the root directory of
|
|
||||||
this path (for example, '/' or 'C:\\'). The other items in
|
|
||||||
the list will be strings.
|
|
||||||
|
|
||||||
path.path.joinpath(*result) will yield the original path.
|
|
||||||
"""
|
|
||||||
parts = []
|
|
||||||
loc = self
|
|
||||||
while loc != os.curdir and loc != os.pardir:
|
|
||||||
prev = loc
|
|
||||||
loc, child = prev.splitpath()
|
|
||||||
if loc == prev:
|
|
||||||
break
|
|
||||||
parts.append(child)
|
|
||||||
parts.append(loc)
|
|
||||||
parts.reverse()
|
|
||||||
return parts
|
|
||||||
|
|
||||||
def relpath(self):
|
|
||||||
""" Return this path as a relative path,
|
|
||||||
based from the current working directory.
|
|
||||||
"""
|
|
||||||
cwd = self.__class__(os.getcwd())
|
|
||||||
return cwd.relpathto(self)
|
|
||||||
|
|
||||||
def relpathto(self, dest):
|
|
||||||
""" Return a relative path from self to dest.
|
|
||||||
|
|
||||||
If there is no relative path from self to dest, for example if
|
|
||||||
they reside on different drives in Windows, then this returns
|
|
||||||
dest.abspath().
|
|
||||||
"""
|
|
||||||
origin = self.abspath()
|
|
||||||
dest = self.__class__(dest).abspath()
|
|
||||||
|
|
||||||
orig_list = origin.normcase().splitall()
|
|
||||||
# Don't normcase dest! We want to preserve the case.
|
|
||||||
dest_list = dest.splitall()
|
|
||||||
|
|
||||||
if orig_list[0] != os.path.normcase(dest_list[0]):
|
|
||||||
# Can't get here from there.
|
|
||||||
return dest
|
|
||||||
|
|
||||||
# Find the location where the two paths start to differ.
|
|
||||||
i = 0
|
|
||||||
for start_seg, dest_seg in zip(orig_list, dest_list):
|
|
||||||
if start_seg != os.path.normcase(dest_seg):
|
|
||||||
break
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
# Now i is the point where the two paths diverge.
|
|
||||||
# Need a certain number of "os.pardir"s to work up
|
|
||||||
# from the origin to the point of divergence.
|
|
||||||
segments = [os.pardir] * (len(orig_list) - i)
|
|
||||||
# Need to add the diverging part of dest_list.
|
|
||||||
segments += dest_list[i:]
|
|
||||||
if len(segments) == 0:
|
|
||||||
# If they happen to be identical, use os.curdir.
|
|
||||||
relpath = os.curdir
|
|
||||||
else:
|
|
||||||
relpath = os.path.join(*segments)
|
|
||||||
return self.__class__(relpath)
|
|
||||||
|
|
||||||
# --- Listing, searching, walking, and matching
|
|
||||||
|
|
||||||
def listdir(self, pattern=None):
|
|
||||||
""" D.listdir() -> List of items in this directory.
|
|
||||||
|
|
||||||
Use D.files() or D.dirs() instead if you want a listing
|
|
||||||
of just files or just subdirectories.
|
|
||||||
|
|
||||||
The elements of the list are path objects.
|
|
||||||
|
|
||||||
With the optional 'pattern' argument, this only lists
|
|
||||||
items whose names match the given pattern.
|
|
||||||
"""
|
|
||||||
names = os.listdir(self)
|
|
||||||
if pattern is not None:
|
|
||||||
names = fnmatch.filter(names, pattern)
|
|
||||||
return [self / child for child in names]
|
|
||||||
|
|
||||||
def dirs(self, pattern=None):
|
|
||||||
""" D.dirs() -> List of this directory's subdirectories.
|
|
||||||
|
|
||||||
The elements of the list are path objects.
|
|
||||||
This does not walk recursively into subdirectories
|
|
||||||
(but see path.walkdirs).
|
|
||||||
|
|
||||||
With the optional 'pattern' argument, this only lists
|
|
||||||
directories whose names match the given pattern. For
|
|
||||||
example, d.dirs('build-*').
|
|
||||||
"""
|
|
||||||
return [p for p in self.listdir(pattern) if p.isdir()]
|
|
||||||
|
|
||||||
def files(self, pattern=None):
|
|
||||||
""" D.files() -> List of the files in this directory.
|
|
||||||
|
|
||||||
The elements of the list are path objects.
|
|
||||||
This does not walk into subdirectories (see path.walkfiles).
|
|
||||||
|
|
||||||
With the optional 'pattern' argument, this only lists files
|
|
||||||
whose names match the given pattern. For example,
|
|
||||||
d.files('*.pyc').
|
|
||||||
"""
|
|
||||||
|
|
||||||
return [p for p in self.listdir(pattern) if p.isfile()]
|
|
||||||
|
|
||||||
def walk(self, pattern=None, errors='strict'):
|
|
||||||
""" D.walk() -> iterator over files and subdirs, recursively.
|
|
||||||
|
|
||||||
The iterator yields path objects naming each child item of
|
|
||||||
this directory and its descendants. This requires that
|
|
||||||
D.isdir().
|
|
||||||
|
|
||||||
This performs a depth-first traversal of the directory tree.
|
|
||||||
Each directory is returned just before all its children.
|
|
||||||
|
|
||||||
The errors= keyword argument controls behavior when an
|
|
||||||
error occurs. The default is 'strict', which causes an
|
|
||||||
exception. The other allowed values are 'warn', which
|
|
||||||
reports the error via warnings.warn(), and 'ignore'.
|
|
||||||
"""
|
|
||||||
if errors not in ('strict', 'warn', 'ignore'):
|
|
||||||
raise ValueError("invalid errors parameter")
|
|
||||||
|
|
||||||
try:
|
|
||||||
childList = self.listdir()
|
|
||||||
except Exception:
|
|
||||||
if errors == 'ignore':
|
|
||||||
return
|
|
||||||
elif errors == 'warn':
|
|
||||||
warnings.warn(
|
|
||||||
"Unable to list directory '%s': %s"
|
|
||||||
% (self, sys.exc_info()[1]),
|
|
||||||
TreeWalkWarning)
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
for child in childList:
|
|
||||||
if pattern is None or child.fnmatch(pattern):
|
|
||||||
yield child
|
|
||||||
try:
|
|
||||||
isdir = child.isdir()
|
|
||||||
except Exception:
|
|
||||||
if errors == 'ignore':
|
|
||||||
isdir = False
|
|
||||||
elif errors == 'warn':
|
|
||||||
warnings.warn(
|
|
||||||
"Unable to access '%s': %s"
|
|
||||||
% (child, sys.exc_info()[1]),
|
|
||||||
TreeWalkWarning)
|
|
||||||
isdir = False
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
if isdir:
|
|
||||||
for item in child.walk(pattern, errors):
|
|
||||||
yield item
|
|
||||||
|
|
||||||
def walkdirs(self, pattern=None, errors='strict'):
|
|
||||||
""" D.walkdirs() -> iterator over subdirs, recursively.
|
|
||||||
|
|
||||||
With the optional 'pattern' argument, this yields only
|
|
||||||
directories whose names match the given pattern. For
|
|
||||||
example, mydir.walkdirs('*test') yields only directories
|
|
||||||
with names ending in 'test'.
|
|
||||||
|
|
||||||
The errors= keyword argument controls behavior when an
|
|
||||||
error occurs. The default is 'strict', which causes an
|
|
||||||
exception. The other allowed values are 'warn', which
|
|
||||||
reports the error via warnings.warn(), and 'ignore'.
|
|
||||||
"""
|
|
||||||
if errors not in ('strict', 'warn', 'ignore'):
|
|
||||||
raise ValueError("invalid errors parameter")
|
|
||||||
|
|
||||||
try:
|
|
||||||
dirs = self.dirs()
|
|
||||||
except Exception:
|
|
||||||
if errors == 'ignore':
|
|
||||||
return
|
|
||||||
elif errors == 'warn':
|
|
||||||
warnings.warn(
|
|
||||||
"Unable to list directory '%s': %s"
|
|
||||||
% (self, sys.exc_info()[1]),
|
|
||||||
TreeWalkWarning)
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
for child in dirs:
|
|
||||||
if pattern is None or child.fnmatch(pattern):
|
|
||||||
yield child
|
|
||||||
for subsubdir in child.walkdirs(pattern, errors):
|
|
||||||
yield subsubdir
|
|
||||||
|
|
||||||
def walkfiles(self, pattern=None, errors='strict'):
|
|
||||||
""" D.walkfiles() -> iterator over files in D, recursively.
|
|
||||||
|
|
||||||
The optional argument, pattern, limits the results to files
|
|
||||||
with names that match the pattern. For example,
|
|
||||||
mydir.walkfiles('*.tmp') yields only files with the .tmp
|
|
||||||
extension.
|
|
||||||
"""
|
|
||||||
if errors not in ('strict', 'warn', 'ignore'):
|
|
||||||
raise ValueError("invalid errors parameter")
|
|
||||||
|
|
||||||
try:
|
|
||||||
childList = self.listdir()
|
|
||||||
except Exception:
|
|
||||||
if errors == 'ignore':
|
|
||||||
return
|
|
||||||
elif errors == 'warn':
|
|
||||||
warnings.warn(
|
|
||||||
"Unable to list directory '%s': %s"
|
|
||||||
% (self, sys.exc_info()[1]),
|
|
||||||
TreeWalkWarning)
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
for child in childList:
|
|
||||||
try:
|
|
||||||
isfile = child.isfile()
|
|
||||||
isdir = not isfile and child.isdir()
|
|
||||||
except:
|
|
||||||
if errors == 'ignore':
|
|
||||||
continue
|
|
||||||
elif errors == 'warn':
|
|
||||||
warnings.warn(
|
|
||||||
"Unable to access '%s': %s"
|
|
||||||
% (self, sys.exc_info()[1]),
|
|
||||||
TreeWalkWarning)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
if isfile:
|
|
||||||
if pattern is None or child.fnmatch(pattern):
|
|
||||||
yield child
|
|
||||||
elif isdir:
|
|
||||||
for f in child.walkfiles(pattern, errors):
|
|
||||||
yield f
|
|
||||||
|
|
||||||
def fnmatch(self, pattern):
|
|
||||||
""" Return True if self.name matches the given pattern.
|
|
||||||
|
|
||||||
pattern - A filename pattern with wildcards,
|
|
||||||
for example '*.py'.
|
|
||||||
"""
|
|
||||||
return fnmatch.fnmatch(self.name, pattern)
|
|
||||||
|
|
||||||
def glob(self, pattern):
|
|
||||||
""" Return a list of path objects that match the pattern.
|
|
||||||
|
|
||||||
pattern - a path relative to this directory, with wildcards.
|
|
||||||
|
|
||||||
For example, path('/users').glob('*/bin/*') returns a list
|
|
||||||
of all the files users have in their bin directories.
|
|
||||||
"""
|
|
||||||
cls = self.__class__
|
|
||||||
return [cls(s) for s in glob.glob(_base(self / pattern))]
|
|
||||||
|
|
||||||
|
|
||||||
# --- Reading or writing an entire file at once.
|
|
||||||
|
|
||||||
def open(self, mode='r'):
|
|
||||||
""" Open this file. Return a file object. """
|
|
||||||
return file(self, mode)
|
|
||||||
|
|
||||||
def bytes(self):
|
|
||||||
""" Open this file, read all bytes, return them as a string. """
|
|
||||||
f = self.open('rb')
|
|
||||||
try:
|
|
||||||
return f.read()
|
|
||||||
finally:
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
def write_bytes(self, bytes, append=False):
|
|
||||||
""" Open this file and write the given bytes to it.
|
|
||||||
|
|
||||||
Default behavior is to overwrite any existing file.
|
|
||||||
Call p.write_bytes(bytes, append=True) to append instead.
|
|
||||||
"""
|
|
||||||
if append:
|
|
||||||
mode = 'ab'
|
|
||||||
else:
|
|
||||||
mode = 'wb'
|
|
||||||
f = self.open(mode)
|
|
||||||
try:
|
|
||||||
f.write(bytes)
|
|
||||||
finally:
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
def text(self, encoding=None, errors='strict'):
|
|
||||||
r""" Open this file, read it in, return the content as a string.
|
|
||||||
|
|
||||||
This uses 'U' mode in Python 2.3 and later, so '\r\n' and '\r'
|
|
||||||
are automatically translated to '\n'.
|
|
||||||
|
|
||||||
Optional arguments:
|
|
||||||
|
|
||||||
encoding - The Unicode encoding (or character set) of
|
|
||||||
the file. If present, the content of the file is
|
|
||||||
decoded and returned as a unicode object; otherwise
|
|
||||||
it is returned as an 8-bit str.
|
|
||||||
errors - How to handle Unicode errors; see help(str.decode)
|
|
||||||
for the options. Default is 'strict'.
|
|
||||||
"""
|
|
||||||
if encoding is None:
|
|
||||||
# 8-bit
|
|
||||||
f = self.open(_textmode)
|
|
||||||
try:
|
|
||||||
return f.read()
|
|
||||||
finally:
|
|
||||||
f.close()
|
|
||||||
else:
|
|
||||||
# Unicode
|
|
||||||
f = codecs.open(self, 'r', encoding, errors)
|
|
||||||
# (Note - Can't use 'U' mode here, since codecs.open
|
|
||||||
# doesn't support 'U' mode, even in Python 2.3.)
|
|
||||||
try:
|
|
||||||
t = f.read()
|
|
||||||
finally:
|
|
||||||
f.close()
|
|
||||||
return (t.replace(u'\r\n', u'\n')
|
|
||||||
.replace(u'\r\x85', u'\n')
|
|
||||||
.replace(u'\r', u'\n')
|
|
||||||
.replace(u'\x85', u'\n')
|
|
||||||
.replace(u'\u2028', u'\n'))
|
|
||||||
|
|
||||||
def write_text(self, text, encoding=None, errors='strict', linesep=os.linesep, append=False):
|
|
||||||
r""" Write the given text to this file.
|
|
||||||
|
|
||||||
The default behavior is to overwrite any existing file;
|
|
||||||
to append instead, use the 'append=True' keyword argument.
|
|
||||||
|
|
||||||
There are two differences between path.write_text() and
|
|
||||||
path.write_bytes(): newline handling and Unicode handling.
|
|
||||||
See below.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
|
|
||||||
- text - str/unicode - The text to be written.
|
|
||||||
|
|
||||||
- encoding - str - The Unicode encoding that will be used.
|
|
||||||
This is ignored if 'text' isn't a Unicode string.
|
|
||||||
|
|
||||||
- errors - str - How to handle Unicode encoding errors.
|
|
||||||
Default is 'strict'. See help(unicode.encode) for the
|
|
||||||
options. This is ignored if 'text' isn't a Unicode
|
|
||||||
string.
|
|
||||||
|
|
||||||
- linesep - keyword argument - str/unicode - The sequence of
|
|
||||||
characters to be used to mark end-of-line. The default is
|
|
||||||
os.linesep. You can also specify None; this means to
|
|
||||||
leave all newlines as they are in 'text'.
|
|
||||||
|
|
||||||
- append - keyword argument - bool - Specifies what to do if
|
|
||||||
the file already exists (True: append to the end of it;
|
|
||||||
False: overwrite it.) The default is False.
|
|
||||||
|
|
||||||
|
|
||||||
--- Newline handling.
|
|
||||||
|
|
||||||
write_text() converts all standard end-of-line sequences
|
|
||||||
('\n', '\r', and '\r\n') to your platform's default end-of-line
|
|
||||||
sequence (see os.linesep; on Windows, for example, the
|
|
||||||
end-of-line marker is '\r\n').
|
|
||||||
|
|
||||||
If you don't like your platform's default, you can override it
|
|
||||||
using the 'linesep=' keyword argument. If you specifically want
|
|
||||||
write_text() to preserve the newlines as-is, use 'linesep=None'.
|
|
||||||
|
|
||||||
This applies to Unicode text the same as to 8-bit text, except
|
|
||||||
there are three additional standard Unicode end-of-line sequences:
|
|
||||||
u'\x85', u'\r\x85', and u'\u2028'.
|
|
||||||
|
|
||||||
(This is slightly different from when you open a file for
|
|
||||||
writing with fopen(filename, "w") in C or file(filename, 'w')
|
|
||||||
in Python.)
|
|
||||||
|
|
||||||
|
|
||||||
--- Unicode
|
|
||||||
|
|
||||||
If 'text' isn't Unicode, then apart from newline handling, the
|
|
||||||
bytes are written verbatim to the file. The 'encoding' and
|
|
||||||
'errors' arguments are not used and must be omitted.
|
|
||||||
|
|
||||||
If 'text' is Unicode, it is first converted to bytes using the
|
|
||||||
specified 'encoding' (or the default encoding if 'encoding'
|
|
||||||
isn't specified). The 'errors' argument applies only to this
|
|
||||||
conversion.
|
|
||||||
|
|
||||||
"""
|
|
||||||
if isinstance(text, unicode):
|
|
||||||
if linesep is not None:
|
|
||||||
# Convert all standard end-of-line sequences to
|
|
||||||
# ordinary newline characters.
|
|
||||||
text = (text.replace(u'\r\n', u'\n')
|
|
||||||
.replace(u'\r\x85', u'\n')
|
|
||||||
.replace(u'\r', u'\n')
|
|
||||||
.replace(u'\x85', u'\n')
|
|
||||||
.replace(u'\u2028', u'\n'))
|
|
||||||
text = text.replace(u'\n', linesep)
|
|
||||||
if encoding is None:
|
|
||||||
encoding = sys.getdefaultencoding()
|
|
||||||
bytes = text.encode(encoding, errors)
|
|
||||||
else:
|
|
||||||
# It is an error to specify an encoding if 'text' is
|
|
||||||
# an 8-bit string.
|
|
||||||
assert encoding is None
|
|
||||||
|
|
||||||
if linesep is not None:
|
|
||||||
text = (text.replace('\r\n', '\n')
|
|
||||||
.replace('\r', '\n'))
|
|
||||||
bytes = text.replace('\n', linesep)
|
|
||||||
|
|
||||||
self.write_bytes(bytes, append)
|
|
||||||
|
|
||||||
def lines(self, encoding=None, errors='strict', retain=True):
|
|
||||||
r""" Open this file, read all lines, return them in a list.
|
|
||||||
|
|
||||||
Optional arguments:
|
|
||||||
encoding - The Unicode encoding (or character set) of
|
|
||||||
the file. The default is None, meaning the content
|
|
||||||
of the file is read as 8-bit characters and returned
|
|
||||||
as a list of (non-Unicode) str objects.
|
|
||||||
errors - How to handle Unicode errors; see help(str.decode)
|
|
||||||
for the options. Default is 'strict'
|
|
||||||
retain - If true, retain newline characters; but all newline
|
|
||||||
character combinations ('\r', '\n', '\r\n') are
|
|
||||||
translated to '\n'. If false, newline characters are
|
|
||||||
stripped off. Default is True.
|
|
||||||
|
|
||||||
This uses 'U' mode in Python 2.3 and later.
|
|
||||||
"""
|
|
||||||
if encoding is None and retain:
|
|
||||||
f = self.open(_textmode)
|
|
||||||
try:
|
|
||||||
return f.readlines()
|
|
||||||
finally:
|
|
||||||
f.close()
|
|
||||||
else:
|
|
||||||
return self.text(encoding, errors).splitlines(retain)
|
|
||||||
|
|
||||||
def write_lines(self, lines, encoding=None, errors='strict',
|
|
||||||
linesep=os.linesep, append=False):
|
|
||||||
r""" Write the given lines of text to this file.
|
|
||||||
|
|
||||||
By default this overwrites any existing file at this path.
|
|
||||||
|
|
||||||
This puts a platform-specific newline sequence on every line.
|
|
||||||
See 'linesep' below.
|
|
||||||
|
|
||||||
lines - A list of strings.
|
|
||||||
|
|
||||||
encoding - A Unicode encoding to use. This applies only if
|
|
||||||
'lines' contains any Unicode strings.
|
|
||||||
|
|
||||||
errors - How to handle errors in Unicode encoding. This
|
|
||||||
also applies only to Unicode strings.
|
|
||||||
|
|
||||||
linesep - The desired line-ending. This line-ending is
|
|
||||||
applied to every line. If a line already has any
|
|
||||||
standard line ending ('\r', '\n', '\r\n', u'\x85',
|
|
||||||
u'\r\x85', u'\u2028'), that will be stripped off and
|
|
||||||
this will be used instead. The default is os.linesep,
|
|
||||||
which is platform-dependent ('\r\n' on Windows, '\n' on
|
|
||||||
Unix, etc.) Specify None to write the lines as-is,
|
|
||||||
like file.writelines().
|
|
||||||
|
|
||||||
Use the keyword argument append=True to append lines to the
|
|
||||||
file. The default is to overwrite the file. Warning:
|
|
||||||
When you use this with Unicode data, if the encoding of the
|
|
||||||
existing data in the file is different from the encoding
|
|
||||||
you specify with the encoding= parameter, the result is
|
|
||||||
mixed-encoding data, which can really confuse someone trying
|
|
||||||
to read the file later.
|
|
||||||
"""
|
|
||||||
if append:
|
|
||||||
mode = 'ab'
|
|
||||||
else:
|
|
||||||
mode = 'wb'
|
|
||||||
f = self.open(mode)
|
|
||||||
try:
|
|
||||||
for line in lines:
|
|
||||||
isUnicode = isinstance(line, unicode)
|
|
||||||
if linesep is not None:
|
|
||||||
# Strip off any existing line-end and add the
|
|
||||||
# specified linesep string.
|
|
||||||
if isUnicode:
|
|
||||||
if line[-2:] in (u'\r\n', u'\x0d\x85'):
|
|
||||||
line = line[:-2]
|
|
||||||
elif line[-1:] in (u'\r', u'\n',
|
|
||||||
u'\x85', u'\u2028'):
|
|
||||||
line = line[:-1]
|
|
||||||
else:
|
|
||||||
if line[-2:] == '\r\n':
|
|
||||||
line = line[:-2]
|
|
||||||
elif line[-1:] in ('\r', '\n'):
|
|
||||||
line = line[:-1]
|
|
||||||
line += linesep
|
|
||||||
if isUnicode:
|
|
||||||
if encoding is None:
|
|
||||||
encoding = sys.getdefaultencoding()
|
|
||||||
line = line.encode(encoding, errors)
|
|
||||||
f.write(line)
|
|
||||||
finally:
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
def read_md5(self):
|
|
||||||
""" Calculate the md5 hash for this file.
|
|
||||||
|
|
||||||
This reads through the entire file.
|
|
||||||
"""
|
|
||||||
f = self.open('rb')
|
|
||||||
try:
|
|
||||||
m = hashlib.md5()
|
|
||||||
while True:
|
|
||||||
d = f.read(8192)
|
|
||||||
if not d:
|
|
||||||
break
|
|
||||||
m.update(d)
|
|
||||||
finally:
|
|
||||||
f.close()
|
|
||||||
return m.digest()
|
|
||||||
|
|
||||||
# --- Methods for querying the filesystem.
|
|
||||||
|
|
||||||
exists = os.path.exists
|
|
||||||
isdir = os.path.isdir
|
|
||||||
isfile = os.path.isfile
|
|
||||||
islink = os.path.islink
|
|
||||||
ismount = os.path.ismount
|
|
||||||
|
|
||||||
if hasattr(os.path, 'samefile'):
|
|
||||||
samefile = os.path.samefile
|
|
||||||
|
|
||||||
getatime = os.path.getatime
|
|
||||||
atime = property(
|
|
||||||
getatime, None, None,
|
|
||||||
""" Last access time of the file. """)
|
|
||||||
|
|
||||||
getmtime = os.path.getmtime
|
|
||||||
mtime = property(
|
|
||||||
getmtime, None, None,
|
|
||||||
""" Last-modified time of the file. """)
|
|
||||||
|
|
||||||
if hasattr(os.path, 'getctime'):
|
|
||||||
getctime = os.path.getctime
|
|
||||||
ctime = property(
|
|
||||||
getctime, None, None,
|
|
||||||
""" Creation time of the file. """)
|
|
||||||
|
|
||||||
getsize = os.path.getsize
|
|
||||||
size = property(
|
|
||||||
getsize, None, None,
|
|
||||||
""" Size of the file, in bytes. """)
|
|
||||||
|
|
||||||
if hasattr(os, 'access'):
|
|
||||||
def access(self, mode):
|
|
||||||
""" Return true if current user has access to this path.
|
|
||||||
|
|
||||||
mode - One of the constants os.F_OK, os.R_OK, os.W_OK, os.X_OK
|
|
||||||
"""
|
|
||||||
return os.access(self, mode)
|
|
||||||
|
|
||||||
def stat(self):
|
|
||||||
""" Perform a stat() system call on this path. """
|
|
||||||
return os.stat(self)
|
|
||||||
|
|
||||||
def lstat(self):
|
|
||||||
""" Like path.stat(), but do not follow symbolic links. """
|
|
||||||
return os.lstat(self)
|
|
||||||
|
|
||||||
def get_owner(self):
|
|
||||||
r""" Return the name of the owner of this file or directory.
|
|
||||||
|
|
||||||
This follows symbolic links.
|
|
||||||
|
|
||||||
On Windows, this returns a name of the form ur'DOMAIN\User Name'.
|
|
||||||
On Windows, a group can own a file or directory.
|
|
||||||
"""
|
|
||||||
if os.name == 'nt':
|
|
||||||
if win32security is None:
|
|
||||||
raise Exception("path.owner requires win32all to be installed")
|
|
||||||
desc = win32security.GetFileSecurity(
|
|
||||||
self, win32security.OWNER_SECURITY_INFORMATION)
|
|
||||||
sid = desc.GetSecurityDescriptorOwner()
|
|
||||||
account, domain, typecode = win32security.LookupAccountSid(None, sid)
|
|
||||||
return domain + u'\\' + account
|
|
||||||
else:
|
|
||||||
if pwd is None:
|
|
||||||
raise NotImplementedError("path.owner is not implemented on this platform.")
|
|
||||||
st = self.stat()
|
|
||||||
return pwd.getpwuid(st.st_uid).pw_name
|
|
||||||
|
|
||||||
owner = property(
|
|
||||||
get_owner, None, None,
|
|
||||||
""" Name of the owner of this file or directory. """)
|
|
||||||
|
|
||||||
if hasattr(os, 'statvfs'):
|
|
||||||
def statvfs(self):
|
|
||||||
""" Perform a statvfs() system call on this path. """
|
|
||||||
return os.statvfs(self)
|
|
||||||
|
|
||||||
if hasattr(os, 'pathconf'):
|
|
||||||
def pathconf(self, name):
|
|
||||||
return os.pathconf(self, name)
|
|
||||||
|
|
||||||
|
|
||||||
# --- Modifying operations on files and directories
|
|
||||||
|
|
||||||
def utime(self, times):
|
|
||||||
""" Set the access and modified times of this file. """
|
|
||||||
os.utime(self, times)
|
|
||||||
|
|
||||||
def chmod(self, mode):
|
|
||||||
os.chmod(self, mode)
|
|
||||||
|
|
||||||
if hasattr(os, 'chown'):
|
|
||||||
def chown(self, uid, gid):
|
|
||||||
os.chown(self, uid, gid)
|
|
||||||
|
|
||||||
def rename(self, new):
|
|
||||||
os.rename(self, new)
|
|
||||||
|
|
||||||
def renames(self, new):
|
|
||||||
os.renames(self, new)
|
|
||||||
|
|
||||||
|
|
||||||
# --- Create/delete operations on directories
|
|
||||||
|
|
||||||
def mkdir(self, mode=0777):
|
|
||||||
os.mkdir(self, mode)
|
|
||||||
|
|
||||||
def makedirs(self, mode=0777):
|
|
||||||
os.makedirs(self, mode)
|
|
||||||
|
|
||||||
def rmdir(self):
|
|
||||||
os.rmdir(self)
|
|
||||||
|
|
||||||
def removedirs(self):
|
|
||||||
os.removedirs(self)
|
|
||||||
|
|
||||||
|
|
||||||
# --- Modifying operations on files
|
|
||||||
|
|
||||||
def touch(self):
|
|
||||||
""" Set the access/modified times of this file to the current time.
|
|
||||||
Create the file if it does not exist.
|
|
||||||
"""
|
|
||||||
fd = os.open(self, os.O_WRONLY | os.O_CREAT, 0666)
|
|
||||||
os.close(fd)
|
|
||||||
os.utime(self, None)
|
|
||||||
|
|
||||||
def remove(self):
|
|
||||||
os.remove(self)
|
|
||||||
|
|
||||||
def unlink(self):
|
|
||||||
os.unlink(self)
|
|
||||||
|
|
||||||
|
|
||||||
# --- Links
|
|
||||||
|
|
||||||
if hasattr(os, 'link'):
|
|
||||||
def link(self, newpath):
|
|
||||||
""" Create a hard link at 'newpath', pointing to this file. """
|
|
||||||
os.link(self, newpath)
|
|
||||||
|
|
||||||
if hasattr(os, 'symlink'):
|
|
||||||
def symlink(self, newlink):
|
|
||||||
""" Create a symbolic link at 'newlink', pointing here. """
|
|
||||||
os.symlink(self, newlink)
|
|
||||||
|
|
||||||
if hasattr(os, 'readlink'):
|
|
||||||
def readlink(self):
|
|
||||||
""" Return the path to which this symbolic link points.
|
|
||||||
|
|
||||||
The result may be an absolute or a relative path.
|
|
||||||
"""
|
|
||||||
return self.__class__(os.readlink(self))
|
|
||||||
|
|
||||||
def readlinkabs(self):
|
|
||||||
""" Return the path to which this symbolic link points.
|
|
||||||
|
|
||||||
The result is always an absolute path.
|
|
||||||
"""
|
|
||||||
p = self.readlink()
|
|
||||||
if p.isabs():
|
|
||||||
return p
|
|
||||||
else:
|
|
||||||
return (self.parent / p).abspath()
|
|
||||||
|
|
||||||
|
|
||||||
# --- High-level functions from shutil
|
|
||||||
|
|
||||||
copyfile = shutil.copyfile
|
|
||||||
copymode = shutil.copymode
|
|
||||||
copystat = shutil.copystat
|
|
||||||
copy = shutil.copy
|
|
||||||
copy2 = shutil.copy2
|
|
||||||
copytree = shutil.copytree
|
|
||||||
if hasattr(shutil, 'move'):
|
|
||||||
move = shutil.move
|
|
||||||
rmtree = shutil.rmtree
|
|
||||||
|
|
||||||
|
|
||||||
# --- Special stuff from os
|
|
||||||
|
|
||||||
if hasattr(os, 'chroot'):
|
|
||||||
def chroot(self):
|
|
||||||
os.chroot(self)
|
|
||||||
|
|
||||||
if hasattr(os, 'startfile'):
|
|
||||||
def startfile(self):
|
|
||||||
os.startfile(self)
|
|
||||||
|
|
@ -1,121 +0,0 @@
|
|||||||
|
|
||||||
import sys, glob, re
|
|
||||||
|
|
||||||
import mechanize
|
|
||||||
|
|
||||||
URL = 'http://translate.google.com/translate_t?text=%(text)s&langpair=en|%(lang)s&oe=UTF8'
|
|
||||||
|
|
||||||
def browser():
|
|
||||||
opener = mechanize.Browser()
|
|
||||||
opener.set_handle_refresh(True)
|
|
||||||
opener.set_handle_robots(False)
|
|
||||||
opener.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4')]
|
|
||||||
return opener
|
|
||||||
|
|
||||||
|
|
||||||
class PoFile(object):
|
|
||||||
|
|
||||||
SANITIZE = re.compile(r'&|<[^<>]+>|\%')
|
|
||||||
STRING = re.compile(r'"(.*)"')
|
|
||||||
|
|
||||||
def __init__(self, po_file):
|
|
||||||
self.po_file = open(po_file, 'r+b')
|
|
||||||
self.browser = browser()
|
|
||||||
self.entries = []
|
|
||||||
self.read()
|
|
||||||
|
|
||||||
def sanitize_line(self, line):
|
|
||||||
return self.SANITIZE.sub(line)
|
|
||||||
|
|
||||||
def read(self):
|
|
||||||
translated_lines = []
|
|
||||||
self.po_file.seek(0)
|
|
||||||
|
|
||||||
ID = 0
|
|
||||||
STR = 1
|
|
||||||
WHR = 2
|
|
||||||
|
|
||||||
mode = None
|
|
||||||
where, msgid, msgstr, fuzzy = [], [], [], False
|
|
||||||
|
|
||||||
for line in self.po_file.readlines():
|
|
||||||
prev_mode = mode
|
|
||||||
if line.startswith('#:'):
|
|
||||||
mode = WHR
|
|
||||||
elif line.startswith('msgid'):
|
|
||||||
mode = ID
|
|
||||||
elif line.startswith('msgstr'):
|
|
||||||
mode = STR
|
|
||||||
elif line.startswith('#,'):
|
|
||||||
fuzzy = True
|
|
||||||
continue
|
|
||||||
elif line.startswith('#') or not line.strip():
|
|
||||||
mode = None
|
|
||||||
|
|
||||||
|
|
||||||
if mode != prev_mode:
|
|
||||||
if prev_mode == STR:
|
|
||||||
self.add_entry(where, fuzzy, msgid, msgstr)
|
|
||||||
where, msgid, msgstr, fuzzy = [], [], [], False
|
|
||||||
|
|
||||||
if mode == WHR:
|
|
||||||
where.append(line[2:].strip())
|
|
||||||
elif mode == ID:
|
|
||||||
msgid.append(self.get_string(line))
|
|
||||||
elif mode == STR:
|
|
||||||
msgstr.append(self.get_string(line))
|
|
||||||
elif mode == None:
|
|
||||||
self.add_line(line)
|
|
||||||
|
|
||||||
def get_string(self, line):
|
|
||||||
return self.STRING.search(line).group(1)
|
|
||||||
|
|
||||||
def add_line(self, line):
|
|
||||||
self.entries.append(line.strip())
|
|
||||||
|
|
||||||
def add_entry(self, where, fuzzy, msgid, msgstr):
|
|
||||||
self.entries.append(Entry(where, fuzzy, msgid, msgstr))
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return '\n'.join([str(i) for i in self.entries]) + '\n'
|
|
||||||
|
|
||||||
|
|
||||||
class Entry(object):
|
|
||||||
|
|
||||||
def __init__(self, where, fuzzy, msgid, msgstr, encoding='utf-8'):
|
|
||||||
self.fuzzy = fuzzy
|
|
||||||
self.where = [i.decode(encoding) for i in where]
|
|
||||||
self.msgid = [i.decode(encoding) for i in msgid]
|
|
||||||
self.msgstr = [i.decode(encoding) for i in msgstr]
|
|
||||||
self.encoding = encoding
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
ans = []
|
|
||||||
for line in self.where:
|
|
||||||
ans.append('#: ' + line.encode(self.encoding))
|
|
||||||
if self.fuzzy:
|
|
||||||
ans.append('#, fuzzy')
|
|
||||||
first = True
|
|
||||||
for line in self.msgid:
|
|
||||||
prefix = 'msgid ' if first else ''
|
|
||||||
ans.append(prefix + '"%s"'%line.encode(self.encoding))
|
|
||||||
first = False
|
|
||||||
first = True
|
|
||||||
for line in self.msgstr:
|
|
||||||
prefix = 'msgstr ' if first else ''
|
|
||||||
ans.append(prefix + '"%s"'%line.encode(self.encoding))
|
|
||||||
first = False
|
|
||||||
return '\n'.join(ans)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
po_files = glob.glob('*.po')
|
|
||||||
for po_file in po_files:
|
|
||||||
PoFile(po_file)
|
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
pof = PoFile('de.po')
|
|
||||||
open('/tmp/de.po', 'wb').write(str(pof))
|
|
||||||
#sys.exit(main())
|
|
@ -15,7 +15,10 @@ def available_translations():
|
|||||||
global _available_translations
|
global _available_translations
|
||||||
if _available_translations is None:
|
if _available_translations is None:
|
||||||
stats = P('localization/stats.pickle')
|
stats = P('localization/stats.pickle')
|
||||||
stats = cPickle.load(open(stats, 'rb'))
|
if os.path.exists(stats):
|
||||||
|
stats = cPickle.load(open(stats, 'rb'))
|
||||||
|
else:
|
||||||
|
stats = {}
|
||||||
_available_translations = [x for x in stats if stats[x] > 0.1]
|
_available_translations = [x for x in stats if stats[x] > 0.1]
|
||||||
return _available_translations
|
return _available_translations
|
||||||
|
|
||||||
|
@ -85,7 +85,7 @@ __all__ = [
|
|||||||
'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
|
'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
|
||||||
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
|
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
|
||||||
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
|
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
|
||||||
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
|
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
|
||||||
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
|
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
|
||||||
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
|
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
|
||||||
'indentedBlock', 'originalTextFor',
|
'indentedBlock', 'originalTextFor',
|
||||||
@ -425,7 +425,7 @@ class ParseResults(object):
|
|||||||
self[k] = v
|
self[k] = v
|
||||||
if isinstance(v[0],ParseResults):
|
if isinstance(v[0],ParseResults):
|
||||||
v[0].__parent = wkref(self)
|
v[0].__parent = wkref(self)
|
||||||
|
|
||||||
self.__toklist += other.__toklist
|
self.__toklist += other.__toklist
|
||||||
self.__accumNames.update( other.__accumNames )
|
self.__accumNames.update( other.__accumNames )
|
||||||
del other
|
del other
|
||||||
@ -3231,12 +3231,12 @@ def originalTextFor(expr, asString=True):
|
|||||||
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
|
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
|
||||||
revert separate tokens with intervening whitespace back to the original matching
|
revert separate tokens with intervening whitespace back to the original matching
|
||||||
input text. Simpler to use than the parse action keepOriginalText, and does not
|
input text. Simpler to use than the parse action keepOriginalText, and does not
|
||||||
require the inspect module to chase up the call stack. By default, returns a
|
require the inspect module to chase up the call stack. By default, returns a
|
||||||
string containing the original parsed text.
|
string containing the original parsed text.
|
||||||
|
|
||||||
If the optional asString argument is passed as False, then the return value is a
|
If the optional asString argument is passed as False, then the return value is a
|
||||||
ParseResults containing any results names that were originally matched, and a
|
ParseResults containing any results names that were originally matched, and a
|
||||||
single token containing the original matched text from the input string. So if
|
single token containing the original matched text from the input string. So if
|
||||||
the expression passed to originalTextFor contains expressions with defined
|
the expression passed to originalTextFor contains expressions with defined
|
||||||
results names, you must set asString to False if you want to preserve those
|
results names, you must set asString to False if you want to preserve those
|
||||||
results name values."""
|
results name values."""
|
||||||
@ -3252,7 +3252,7 @@ def originalTextFor(expr, asString=True):
|
|||||||
del t["_original_end"]
|
del t["_original_end"]
|
||||||
matchExpr.setParseAction(extractText)
|
matchExpr.setParseAction(extractText)
|
||||||
return matchExpr
|
return matchExpr
|
||||||
|
|
||||||
# convenience constants for positional expressions
|
# convenience constants for positional expressions
|
||||||
empty = Empty().setName("empty")
|
empty = Empty().setName("empty")
|
||||||
lineStart = LineStart().setName("lineStart")
|
lineStart = LineStart().setName("lineStart")
|
||||||
@ -3532,7 +3532,7 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
|
|||||||
).setParseAction(lambda t:t[0].strip()))
|
).setParseAction(lambda t:t[0].strip()))
|
||||||
else:
|
else:
|
||||||
if ignoreExpr is not None:
|
if ignoreExpr is not None:
|
||||||
content = (Combine(OneOrMore(~ignoreExpr +
|
content = (Combine(OneOrMore(~ignoreExpr +
|
||||||
~Literal(opener) + ~Literal(closer) +
|
~Literal(opener) + ~Literal(closer) +
|
||||||
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
||||||
).setParseAction(lambda t:t[0].strip()))
|
).setParseAction(lambda t:t[0].strip()))
|
||||||
|
@ -20,6 +20,7 @@ class WriteXmlMixin:
|
|||||||
def to_xml(self, encoding = "iso-8859-1"):
|
def to_xml(self, encoding = "iso-8859-1"):
|
||||||
try:
|
try:
|
||||||
import cStringIO as StringIO
|
import cStringIO as StringIO
|
||||||
|
StringIO
|
||||||
except ImportError:
|
except ImportError:
|
||||||
import StringIO
|
import StringIO
|
||||||
f = StringIO.StringIO()
|
f = StringIO.StringIO()
|
||||||
@ -64,7 +65,7 @@ def _format_date(dt):
|
|||||||
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
|
||||||
dt.year, dt.hour, dt.minute, dt.second)
|
dt.year, dt.hour, dt.minute, dt.second)
|
||||||
|
|
||||||
|
|
||||||
##
|
##
|
||||||
# A couple simple wrapper objects for the fields which
|
# A couple simple wrapper objects for the fields which
|
||||||
# take a simple value other than a string.
|
# take a simple value other than a string.
|
||||||
@ -72,7 +73,7 @@ class IntElement:
|
|||||||
"""implements the 'publish' API for integers
|
"""implements the 'publish' API for integers
|
||||||
|
|
||||||
Takes the tag name and the integer value to publish.
|
Takes the tag name and the integer value to publish.
|
||||||
|
|
||||||
(Could be used for anything which uses str() to be published
|
(Could be used for anything which uses str() to be published
|
||||||
to text for XML.)
|
to text for XML.)
|
||||||
"""
|
"""
|
||||||
@ -138,7 +139,7 @@ class Image:
|
|||||||
self.width = width
|
self.width = width
|
||||||
self.height = height
|
self.height = height
|
||||||
self.description = description
|
self.description = description
|
||||||
|
|
||||||
def publish(self, handler):
|
def publish(self, handler):
|
||||||
handler.startElement("image", self.element_attrs)
|
handler.startElement("image", self.element_attrs)
|
||||||
|
|
||||||
@ -150,7 +151,7 @@ class Image:
|
|||||||
if isinstance(width, int):
|
if isinstance(width, int):
|
||||||
width = IntElement("width", width)
|
width = IntElement("width", width)
|
||||||
_opt_element(handler, "width", width)
|
_opt_element(handler, "width", width)
|
||||||
|
|
||||||
height = self.height
|
height = self.height
|
||||||
if isinstance(height, int):
|
if isinstance(height, int):
|
||||||
height = IntElement("height", height)
|
height = IntElement("height", height)
|
||||||
@ -196,7 +197,7 @@ class TextInput:
|
|||||||
_element(handler, "name", self.name)
|
_element(handler, "name", self.name)
|
||||||
_element(handler, "link", self.link)
|
_element(handler, "link", self.link)
|
||||||
handler.endElement("textInput")
|
handler.endElement("textInput")
|
||||||
|
|
||||||
|
|
||||||
class Enclosure:
|
class Enclosure:
|
||||||
"""Publish an enclosure"""
|
"""Publish an enclosure"""
|
||||||
@ -255,7 +256,7 @@ class RSS2(WriteXmlMixin):
|
|||||||
Stores the channel attributes, with the "category" elements under
|
Stores the channel attributes, with the "category" elements under
|
||||||
".categories" and the RSS items under ".items".
|
".categories" and the RSS items under ".items".
|
||||||
"""
|
"""
|
||||||
|
|
||||||
rss_attrs = {"version": "2.0"}
|
rss_attrs = {"version": "2.0"}
|
||||||
element_attrs = {}
|
element_attrs = {}
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -269,7 +270,7 @@ class RSS2(WriteXmlMixin):
|
|||||||
webMaster = None,
|
webMaster = None,
|
||||||
pubDate = None, # a datetime, *in* *GMT*
|
pubDate = None, # a datetime, *in* *GMT*
|
||||||
lastBuildDate = None, # a datetime
|
lastBuildDate = None, # a datetime
|
||||||
|
|
||||||
categories = None, # list of strings or Category
|
categories = None, # list of strings or Category
|
||||||
generator = _generator_name,
|
generator = _generator_name,
|
||||||
docs = "http://blogs.law.harvard.edu/tech/rss",
|
docs = "http://blogs.law.harvard.edu/tech/rss",
|
||||||
@ -294,7 +295,7 @@ class RSS2(WriteXmlMixin):
|
|||||||
self.webMaster = webMaster
|
self.webMaster = webMaster
|
||||||
self.pubDate = pubDate
|
self.pubDate = pubDate
|
||||||
self.lastBuildDate = lastBuildDate
|
self.lastBuildDate = lastBuildDate
|
||||||
|
|
||||||
if categories is None:
|
if categories is None:
|
||||||
categories = []
|
categories = []
|
||||||
self.categories = categories
|
self.categories = categories
|
||||||
@ -320,7 +321,7 @@ class RSS2(WriteXmlMixin):
|
|||||||
_element(handler, "description", self.description)
|
_element(handler, "description", self.description)
|
||||||
|
|
||||||
self.publish_extensions(handler)
|
self.publish_extensions(handler)
|
||||||
|
|
||||||
_opt_element(handler, "language", self.language)
|
_opt_element(handler, "language", self.language)
|
||||||
_opt_element(handler, "copyright", self.copyright)
|
_opt_element(handler, "copyright", self.copyright)
|
||||||
_opt_element(handler, "managingEditor", self.managingEditor)
|
_opt_element(handler, "managingEditor", self.managingEditor)
|
||||||
@ -374,8 +375,8 @@ class RSS2(WriteXmlMixin):
|
|||||||
# output after the three required fields.
|
# output after the three required fields.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class RSSItem(WriteXmlMixin):
|
class RSSItem(WriteXmlMixin):
|
||||||
"""Publish an RSS Item"""
|
"""Publish an RSS Item"""
|
||||||
element_attrs = {}
|
element_attrs = {}
|
||||||
@ -391,7 +392,7 @@ class RSSItem(WriteXmlMixin):
|
|||||||
pubDate = None, # a datetime
|
pubDate = None, # a datetime
|
||||||
source = None, # a Source
|
source = None, # a Source
|
||||||
):
|
):
|
||||||
|
|
||||||
if title is None and description is None:
|
if title is None and description is None:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"must define at least one of 'title' or 'description'")
|
"must define at least one of 'title' or 'description'")
|
||||||
@ -421,7 +422,7 @@ class RSSItem(WriteXmlMixin):
|
|||||||
if isinstance(category, basestring):
|
if isinstance(category, basestring):
|
||||||
category = Category(category)
|
category = Category(category)
|
||||||
category.publish(handler)
|
category.publish(handler)
|
||||||
|
|
||||||
_opt_element(handler, "comments", self.comments)
|
_opt_element(handler, "comments", self.comments)
|
||||||
if self.enclosure is not None:
|
if self.enclosure is not None:
|
||||||
self.enclosure.publish(handler)
|
self.enclosure.publish(handler)
|
||||||
@ -434,7 +435,7 @@ class RSSItem(WriteXmlMixin):
|
|||||||
|
|
||||||
if self.source is not None:
|
if self.source is not None:
|
||||||
self.source.publish(handler)
|
self.source.publish(handler)
|
||||||
|
|
||||||
handler.endElement("item")
|
handler.endElement("item")
|
||||||
|
|
||||||
def publish_extensions(self, handler):
|
def publish_extensions(self, handler):
|
||||||
|
@ -57,13 +57,13 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti',
|
'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti',
|
||||||
'esquire', 'livemint', 'thedgesingapore', 'darknet', 'rga',
|
'esquire', 'livemint', 'thedgesingapore', 'darknet', 'rga',
|
||||||
'intelligencer', 'theoldfoodie', 'hln_be', 'honvedelem',
|
'intelligencer', 'theoldfoodie', 'hln_be', 'honvedelem',
|
||||||
|
'the_new_republic',
|
||||||
)]
|
)]
|
||||||
|
|
||||||
|
|
||||||
import re, imp, inspect, time, os
|
import re, imp, inspect, time, os
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.path import path
|
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
from calibre import __appname__, english_sort
|
from calibre import __appname__, english_sort
|
||||||
|
|
||||||
@ -102,8 +102,8 @@ def compile_recipe(src):
|
|||||||
'''
|
'''
|
||||||
global _tdir, _crep
|
global _tdir, _crep
|
||||||
if _tdir is None or not os.path.exists(_tdir):
|
if _tdir is None or not os.path.exists(_tdir):
|
||||||
_tdir = path(PersistentTemporaryDirectory('_recipes'))
|
_tdir = PersistentTemporaryDirectory('_recipes')
|
||||||
temp = _tdir/('recipe%d.py'%_crep)
|
temp = os.path.join(_tdir, 'recipe%d.py'%_crep)
|
||||||
_crep += 1
|
_crep += 1
|
||||||
if not isinstance(src, unicode):
|
if not isinstance(src, unicode):
|
||||||
match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
|
match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
|
||||||
@ -118,8 +118,9 @@ def compile_recipe(src):
|
|||||||
src = src.replace('from libprs500', 'from calibre').encode('utf-8')
|
src = src.replace('from libprs500', 'from calibre').encode('utf-8')
|
||||||
f.write(src)
|
f.write(src)
|
||||||
f.close()
|
f.close()
|
||||||
module = imp.find_module(temp.namebase, [temp.dirname()])
|
module = imp.find_module(os.path.splitext(os.path.basename(temp))[0],
|
||||||
module = imp.load_module(temp.namebase, *module)
|
[os.path.dirname(temp)])
|
||||||
|
module = imp.load_module(os.path.splitext(os.path.basename(temp))[0], *module)
|
||||||
classes = inspect.getmembers(module,
|
classes = inspect.getmembers(module,
|
||||||
lambda x : inspect.isclass(x) and \
|
lambda x : inspect.isclass(x) and \
|
||||||
issubclass(x, (BasicNewsRecipe,)) and \
|
issubclass(x, (BasicNewsRecipe,)) and \
|
||||||
@ -148,6 +149,7 @@ _titles.sort(cmp=english_sort)
|
|||||||
titles = _titles
|
titles = _titles
|
||||||
|
|
||||||
def migrate_automatic_profile_to_automatic_recipe(profile):
|
def migrate_automatic_profile_to_automatic_recipe(profile):
|
||||||
|
BeautifulSoup
|
||||||
oprofile = profile
|
oprofile = profile
|
||||||
profile = compile_recipe(profile)
|
profile = compile_recipe(profile)
|
||||||
if 'BasicUserProfile' not in profile.__name__:
|
if 'BasicUserProfile' not in profile.__name__:
|
||||||
@ -165,3 +167,4 @@ class BasicUserRecipe%d(AutomaticNewsRecipe):
|
|||||||
'''%(int(time.time()), repr(profile.title), profile.oldest_article,
|
'''%(int(time.time()), repr(profile.title), profile.oldest_article,
|
||||||
profile.max_articles_per_feed, profile.summary_length, repr(profile.feeds))
|
profile.max_articles_per_feed, profile.summary_length, repr(profile.feeds))
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,61 +1,61 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
24sata.hr
|
24sata.hr
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class Cro24Sata(BasicNewsRecipe):
|
class Cro24Sata(BasicNewsRecipe):
|
||||||
title = '24 Sata - Hr'
|
title = '24 Sata - Hr'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = "News Portal from Croatia"
|
description = "News Portal from Croatia"
|
||||||
publisher = '24sata.hr'
|
publisher = '24sata.hr'
|
||||||
category = 'news, politics, Croatia'
|
category = 'news, politics, Croatia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
delay = 4
|
delay = 4
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'hr'
|
language = 'hr'
|
||||||
|
|
||||||
lang = 'hr-HR'
|
lang = 'hr-HR'
|
||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','embed'])
|
dict(name=['object','link','embed'])
|
||||||
,dict(name='table', attrs={'class':'enumbox'})
|
,dict(name='table', attrs={'class':'enumbox'})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '&action=ispis'
|
return url + '&action=ispis'
|
||||||
|
|
||||||
|
@ -1,68 +1,68 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
24sata.rs
|
24sata.rs
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class Ser24Sata(BasicNewsRecipe):
|
class Ser24Sata(BasicNewsRecipe):
|
||||||
title = '24 Sata - Sr'
|
title = '24 Sata - Sr'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = '24 sata portal vesti iz Srbije'
|
description = '24 sata portal vesti iz Srbije'
|
||||||
publisher = 'Ringier d.o.o.'
|
publisher = 'Ringier d.o.o.'
|
||||||
category = 'news, politics, entertainment, Serbia'
|
category = 'news, politics, entertainment, Serbia'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
|
||||||
lang = 'sr-Latn-RS'
|
lang = 'sr-Latn-RS'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
|
|
||||||
attribs = [ 'style','font','valign'
|
attribs = [ 'style','font','valign'
|
||||||
,'colspan','width','height'
|
,'colspan','width','height'
|
||||||
,'rowspan','summary','align'
|
,'rowspan','summary','align'
|
||||||
,'cellspacing','cellpadding'
|
,'cellspacing','cellpadding'
|
||||||
,'frames','rules','border'
|
,'frames','rules','border'
|
||||||
]
|
]
|
||||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
item.name = 'div'
|
item.name = 'div'
|
||||||
for attrib in attribs:
|
for attrib in attribs:
|
||||||
if item.has_key(attrib):
|
if item.has_key(attrib):
|
||||||
del item[attrib]
|
del item[attrib]
|
||||||
|
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
article = url.partition('#')[0]
|
article = url.partition('#')[0]
|
||||||
article_id = article.partition('id=')[2]
|
article_id = article.partition('id=')[2]
|
||||||
return 'http://www.24sata.rs/_print.php?id=' + article_id
|
return 'http://www.24sata.rs/_print.php?id=' + article_id
|
||||||
|
|
||||||
|
@ -1,72 +1,72 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elargentino.com
|
elargentino.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class SieteDias(BasicNewsRecipe):
|
class SieteDias(BasicNewsRecipe):
|
||||||
title = '7 dias'
|
title = '7 dias'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Revista Argentina'
|
description = 'Revista Argentina'
|
||||||
publisher = 'ElArgentino.com'
|
publisher = 'ElArgentino.com'
|
||||||
category = 'news, politics, show, Argentina'
|
category = 'news, politics, show, Argentina'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html'
|
INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html'
|
||||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||||
|
|
||||||
remove_tags = [dict(name='link')]
|
remove_tags = [dict(name='link')]
|
||||||
|
|
||||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=125&Content-Type=text/xml&ChannelDesc=7%20D%C3%ADas')]
|
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=125&Content-Type=text/xml&ChannelDesc=7%20D%C3%ADas')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
main, sep, article_part = url.partition('/nota-')
|
main, sep, article_part = url.partition('/nota-')
|
||||||
article_id, rsep, rrest = article_part.partition('-')
|
article_id, rsep, rrest = article_part.partition('-')
|
||||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir' ] = self.direction
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||||
if cover_item:
|
if cover_item:
|
||||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
def image_url_processor(self, baseurl, url):
|
||||||
base, sep, rest = url.rpartition('?Id=')
|
base, sep, rest = url.rpartition('?Id=')
|
||||||
img, sep2, rrest = rest.partition('&')
|
img, sep2, rrest = rest.partition('&')
|
||||||
return base + sep + img
|
return base + sep + img
|
||||||
|
@ -1,59 +1,59 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.accountancyage.com
|
www.accountancyage.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class AccountancyAge(BasicNewsRecipe):
|
class AccountancyAge(BasicNewsRecipe):
|
||||||
title = 'Accountancy Age'
|
title = 'Accountancy Age'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'business news'
|
description = 'business news'
|
||||||
publisher = 'accountancyage.com'
|
publisher = 'accountancyage.com'
|
||||||
category = 'news, politics, finances'
|
category = 'news, politics, finances'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
lang = 'en'
|
lang = 'en'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'bodycol'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'bodycol'})]
|
||||||
remove_tags = [dict(name=['embed','object'])]
|
remove_tags = [dict(name=['embed','object'])]
|
||||||
remove_tags_after = dict(name='div', attrs={'id':'permalink'})
|
remove_tags_after = dict(name='div', attrs={'id':'permalink'})
|
||||||
remove_tags_before = dict(name='div', attrs={'class':'gap6'})
|
remove_tags_before = dict(name='div', attrs={'class':'gap6'})
|
||||||
|
|
||||||
feeds = [(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')]
|
feeds = [(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
rest, sep, miss = url.rpartition('/')
|
rest, sep, miss = url.rpartition('/')
|
||||||
rr, ssep, artid = rest.rpartition('/')
|
rr, ssep, artid = rest.rpartition('/')
|
||||||
return u'http://www.accountancyage.com/articles/print/' + artid
|
return u'http://www.accountancyage.com/articles/print/' + artid
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
return article.get('guid', None)
|
return article.get('guid', None)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
@ -1,77 +1,77 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.adventuregamers.com
|
www.adventuregamers.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class AdventureGamers(BasicNewsRecipe):
|
class AdventureGamers(BasicNewsRecipe):
|
||||||
title = u'Adventure Gamers'
|
title = u'Adventure Gamers'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Adventure games portal'
|
description = 'Adventure games portal'
|
||||||
publisher = 'Adventure Gamers'
|
publisher = 'Adventure Gamers'
|
||||||
category = 'news, games, adventure, technology'
|
category = 'news, games, adventure, technology'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
oldest_article = 10
|
oldest_article = 10
|
||||||
delay = 10
|
delay = 10
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
INDEX = u'http://www.adventuregamers.com'
|
INDEX = u'http://www.adventuregamers.com'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'content_middle'})
|
dict(name='div', attrs={'class':'content_middle'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','embed','form'])
|
dict(name=['object','link','embed','form'])
|
||||||
,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']})
|
,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
|
remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')]
|
feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
return article.get('guid', None)
|
return article.get('guid', None)
|
||||||
|
|
||||||
def append_page(self, soup, appendtag, position):
|
def append_page(self, soup, appendtag, position):
|
||||||
pager = soup.find('div',attrs={'class':'toolbar_fat_next'})
|
pager = soup.find('div',attrs={'class':'toolbar_fat_next'})
|
||||||
if pager:
|
if pager:
|
||||||
nexturl = self.INDEX + pager.a['href']
|
nexturl = self.INDEX + pager.a['href']
|
||||||
soup2 = self.index_to_soup(nexturl)
|
soup2 = self.index_to_soup(nexturl)
|
||||||
texttag = soup2.find('div', attrs={'class':'bodytext'})
|
texttag = soup2.find('div', attrs={'class':'bodytext'})
|
||||||
for it in texttag.findAll(style=True):
|
for it in texttag.findAll(style=True):
|
||||||
del it['style']
|
del it['style']
|
||||||
newpos = len(texttag.contents)
|
newpos = len(texttag.contents)
|
||||||
self.append_page(soup2,texttag,newpos)
|
self.append_page(soup2,texttag,newpos)
|
||||||
texttag.extract()
|
texttag.extract()
|
||||||
appendtag.insert(position,texttag)
|
appendtag.insert(position,texttag)
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
self.append_page(soup, soup.body, 3)
|
self.append_page(soup, soup.body, 3)
|
||||||
pager = soup.find('div',attrs={'class':'toolbar_fat'})
|
pager = soup.find('div',attrs={'class':'toolbar_fat'})
|
||||||
if pager:
|
if pager:
|
||||||
pager.extract()
|
pager.extract()
|
||||||
return soup
|
return soup
|
||||||
|
@ -1,62 +1,61 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
ambito.com
|
ambito.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Ambito(BasicNewsRecipe):
|
class Ambito(BasicNewsRecipe):
|
||||||
title = 'Ambito.com'
|
title = 'Ambito.com'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Informacion Libre las 24 horas'
|
||||||
publisher = 'Ambito.com'
|
publisher = 'Ambito.com'
|
||||||
category = 'news, politics, Argentina'
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'iso-8859-1'
|
encoding = 'iso-8859-1'
|
||||||
cover_url = 'http://www.ambito.com/img/logo_.jpg'
|
cover_url = 'http://www.ambito.com/img/logo_.jpg'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link'])]
|
remove_tags = [dict(name=['object','link'])]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
||||||
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
||||||
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' )
|
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' )
|
||||||
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General')
|
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General')
|
||||||
,(u'Agro' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
|
,(u'Agro' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
|
||||||
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' )
|
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' )
|
||||||
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' )
|
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' )
|
||||||
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' )
|
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' )
|
||||||
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnologia' )
|
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnologia' )
|
||||||
,(u'Salud' , u'http://www.ambito.com/rss/noticias.asp?S=Salud' )
|
,(u'Salud' , u'http://www.ambito.com/rss/noticias.asp?S=Salud' )
|
||||||
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' )
|
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
|
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
@ -1,55 +1,55 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
spectator.org
|
spectator.org
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class TheAmericanSpectator(BasicNewsRecipe):
|
class TheAmericanSpectator(BasicNewsRecipe):
|
||||||
title = 'The American Spectator'
|
title = 'The American Spectator'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
description = 'News from USA'
|
description = 'News from USA'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
INDEX = 'http://spectator.org'
|
INDEX = 'http://spectator.org'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , 'news, politics, USA'
|
, '--category' , 'news, politics, USA'
|
||||||
, '--publisher' , title
|
, '--publisher' , title
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'post inner'})
|
dict(name='div', attrs={'class':'post inner'})
|
||||||
,dict(name='div', attrs={'class':'author-bio'})
|
,dict(name='div', attrs={'class':'author-bio'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='object')
|
dict(name='object')
|
||||||
,dict(name='div', attrs={'class':'col3' })
|
,dict(name='div', attrs={'class':'col3' })
|
||||||
,dict(name='div', attrs={'class':'post-options' })
|
,dict(name='div', attrs={'class':'post-options' })
|
||||||
,dict(name='p' , attrs={'class':'letter-editor'})
|
,dict(name='p' , attrs={'class':'letter-editor'})
|
||||||
,dict(name='div', attrs={'class':'social' })
|
,dict(name='div', attrs={'class':'social' })
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
|
feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
link_item = soup.find('a',attrs={'class':'cover'})
|
link_item = soup.find('a',attrs={'class':'cover'})
|
||||||
if link_item:
|
if link_item:
|
||||||
soup2 = self.index_to_soup(link_item['href'])
|
soup2 = self.index_to_soup(link_item['href'])
|
||||||
link_item2 = soup2.find('div',attrs={'class':'post inner issues'})
|
link_item2 = soup2.find('div',attrs={'class':'post inner issues'})
|
||||||
cover_url = self.INDEX + link_item2.img['src']
|
cover_url = self.INDEX + link_item2.img['src']
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '/print'
|
return url + '/print'
|
||||||
|
@ -1,62 +1,62 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
axxon.com.ar
|
axxon.com.ar
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class Axxon_news(BasicNewsRecipe):
|
class Axxon_news(BasicNewsRecipe):
|
||||||
title = 'Axxon noticias'
|
title = 'Axxon noticias'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Axxon, Ciencia Ficcion en Bits'
|
description = 'Axxon, Ciencia Ficcion en Bits'
|
||||||
publisher = 'Axxon'
|
publisher = 'Axxon'
|
||||||
category = 'news, SF, Argentina, science, movies'
|
category = 'news, SF, Argentina, science, movies'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','iframe','embed'])]
|
remove_tags = [dict(name=['object','link','iframe','embed'])]
|
||||||
|
|
||||||
feeds = [(u'Noticias', u'http://axxon.com.ar/noticias/feed/')]
|
feeds = [(u'Noticias', u'http://axxon.com.ar/noticias/feed/')]
|
||||||
|
|
||||||
remove_attributes = ['style','width','height','font','border','align']
|
remove_attributes = ['style','width','height','font','border','align']
|
||||||
|
|
||||||
|
|
||||||
def adeify_images2(cls, soup):
|
def adeify_images2(cls, soup):
|
||||||
for item in soup.findAll('img'):
|
for item in soup.findAll('img'):
|
||||||
for attrib in ['height','width','border','align','style']:
|
for attrib in ['height','width','border','align','style']:
|
||||||
if item.has_key(attrib):
|
if item.has_key(attrib):
|
||||||
del item[attrib]
|
del item[attrib]
|
||||||
oldParent = item.parent
|
oldParent = item.parent
|
||||||
if oldParent.name == 'a':
|
if oldParent.name == 'a':
|
||||||
oldParent.name == 'p'
|
oldParent.name == 'p'
|
||||||
myIndex = oldParent.contents.index(item)
|
myIndex = oldParent.contents.index(item)
|
||||||
brtag = Tag(soup,'br')
|
brtag = Tag(soup,'br')
|
||||||
oldParent.insert(myIndex+1,brtag)
|
oldParent.insert(myIndex+1,brtag)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.html.insert(0,mlang)
|
soup.html.insert(0,mlang)
|
||||||
return self.adeify_images2(soup)
|
return self.adeify_images2(soup)
|
||||||
|
|
||||||
|
@ -1,65 +1,65 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.azstarnet.com
|
www.azstarnet.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Azstarnet(BasicNewsRecipe):
|
class Azstarnet(BasicNewsRecipe):
|
||||||
title = 'Arizona Daily Star'
|
title = 'Arizona Daily Star'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'news from Arizona'
|
description = 'news from Arizona'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
publisher = 'azstarnet.com'
|
publisher = 'azstarnet.com'
|
||||||
category = 'news, politics, Arizona, USA'
|
category = 'news, politics, Arizona, USA'
|
||||||
delay = 1
|
delay = 1
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('http://azstarnet.com/registration/retro.php')
|
br.open('http://azstarnet.com/registration/retro.php')
|
||||||
br.select_form(nr=1)
|
br.select_form(nr=1)
|
||||||
br['email'] = self.username
|
br['email'] = self.username
|
||||||
br['pass' ] = self.password
|
br['pass' ] = self.password
|
||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','iframe','base','img'])
|
dict(name=['object','link','iframe','base','img'])
|
||||||
,dict(name='div',attrs={'class':'bannerinstory'})
|
,dict(name='div',attrs={'class':'bannerinstory'})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [(u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')]
|
feeds = [(u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['dir' ] = 'ltr'
|
soup.html['dir' ] = 'ltr'
|
||||||
soup.html['lang'] = 'en-US'
|
soup.html['lang'] = 'en-US'
|
||||||
mtag = '\n<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
mtag = '\n<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -1,69 +1,69 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
b92.net
|
b92.net
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class B92(BasicNewsRecipe):
|
class B92(BasicNewsRecipe):
|
||||||
title = 'B92'
|
title = 'B92'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Dnevne vesti iz Srbije i sveta'
|
description = 'Dnevne vesti iz Srbije i sveta'
|
||||||
publisher = 'B92'
|
publisher = 'B92'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
|
||||||
lang = 'sr-Latn-RS'
|
lang = 'sr-Latn-RS'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='table', attrs={'class':'maindocument'})]
|
keep_only_tags = [dict(name='table', attrs={'class':'maindocument'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='ul', attrs={'class':'comment-nav'})
|
dict(name='ul', attrs={'class':'comment-nav'})
|
||||||
,dict(name=['embed','link','base'] )
|
,dict(name=['embed','link','base'] )
|
||||||
,dict(name='div', attrs={'class':'udokum'} )
|
,dict(name='div', attrs={'class':'udokum'} )
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
|
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
|
||||||
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
|
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + '&version=print'
|
return url + '&version=print'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
del soup.body['onload']
|
del soup.body['onload']
|
||||||
for item in soup.findAll('font'):
|
for item in soup.findAll('font'):
|
||||||
item.name='div'
|
item.name='div'
|
||||||
if item.has_key('size'):
|
if item.has_key('size'):
|
||||||
del item['size']
|
del item['size']
|
||||||
attribs = [ 'style','font','valign'
|
attribs = [ 'style','font','valign'
|
||||||
,'colspan','width','height'
|
,'colspan','width','height'
|
||||||
,'rowspan','summary','align'
|
,'rowspan','summary','align'
|
||||||
,'cellspacing','cellpadding'
|
,'cellspacing','cellpadding'
|
||||||
,'frames','rules','border'
|
,'frames','rules','border'
|
||||||
]
|
]
|
||||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
item.name = 'div'
|
item.name = 'div'
|
||||||
for attrib in attribs:
|
for attrib in attribs:
|
||||||
if item.has_key(attrib):
|
if item.has_key(attrib):
|
||||||
del item[attrib]
|
del item[attrib]
|
||||||
return soup
|
return soup
|
||||||
|
@ -1,93 +1,93 @@
|
|||||||
##
|
##
|
||||||
## web2lrf profile to download articles from Barrons.com
|
## web2lrf profile to download articles from Barrons.com
|
||||||
## can download subscriber-only content if username and
|
## can download subscriber-only content if username and
|
||||||
## password are supplied.
|
## password are supplied.
|
||||||
##
|
##
|
||||||
'''
|
'''
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Barrons(BasicNewsRecipe):
|
class Barrons(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Barron\'s'
|
title = 'Barron\'s'
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'Weekly publication for investors from the publisher of the Wall Street Journal'
|
description = 'Weekly publication for investors from the publisher of the Wall Street Journal'
|
||||||
timefmt = ' [%a, %b %d, %Y]'
|
timefmt = ' [%a, %b %d, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
|
match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
|
||||||
conversion_options = {'linearize_tables': True}
|
conversion_options = {'linearize_tables': True}
|
||||||
##delay = 1
|
##delay = 1
|
||||||
|
|
||||||
## Don't grab articles more than 7 days old
|
## Don't grab articles more than 7 days old
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
[
|
[
|
||||||
## Remove anything before the body of the article.
|
## Remove anything before the body of the article.
|
||||||
(r'<body.*?<!-- article start', lambda match: '<body><!-- article start'),
|
(r'<body.*?<!-- article start', lambda match: '<body><!-- article start'),
|
||||||
|
|
||||||
## Remove any insets from the body of the article.
|
## Remove any insets from the body of the article.
|
||||||
(r'<div id="inset".*?</div>.?</div>.?<p', lambda match : '<p'),
|
(r'<div id="inset".*?</div>.?</div>.?<p', lambda match : '<p'),
|
||||||
|
|
||||||
## Remove any reprint info from the body of the article.
|
## Remove any reprint info from the body of the article.
|
||||||
(r'<hr size.*?<p', lambda match : '<p'),
|
(r'<hr size.*?<p', lambda match : '<p'),
|
||||||
|
|
||||||
## Remove anything after the end of the article.
|
## Remove anything after the end of the article.
|
||||||
(r'<!-- article end.*?</body>', lambda match : '</body>'),
|
(r'<!-- article end.*?</body>', lambda match : '</body>'),
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open('http://commerce.barrons.com/auth/login')
|
br.open('http://commerce.barrons.com/auth/login')
|
||||||
br.select_form(name='login_form')
|
br.select_form(name='login_form')
|
||||||
br['user'] = self.username
|
br['user'] = self.username
|
||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
## Use the print version of a page when available.
|
## Use the print version of a page when available.
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/article/', '/article_print/')
|
return url.replace('/article/', '/article_print/')
|
||||||
|
|
||||||
## Comment out the feeds you don't want retrieved.
|
## Comment out the feeds you don't want retrieved.
|
||||||
## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire
|
## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire
|
||||||
|
|
||||||
def get_feeds(self):
|
def get_feeds(self):
|
||||||
return [
|
return [
|
||||||
('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'),
|
('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'),
|
||||||
('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'),
|
('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'),
|
||||||
('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'),
|
('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'),
|
||||||
('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'),
|
('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'),
|
||||||
('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'),
|
('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'),
|
||||||
('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
|
('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
## Logout of website
|
## Logout of website
|
||||||
## NOT CURRENTLY WORKING
|
## NOT CURRENTLY WORKING
|
||||||
# def cleanup(self):
|
# def cleanup(self):
|
||||||
# try:
|
# try:
|
||||||
# self.browser.set_debug_responses(True)
|
# self.browser.set_debug_responses(True)
|
||||||
# import sys, logging
|
# import sys, logging
|
||||||
# logger = logging.getLogger("mechanize")
|
# logger = logging.getLogger("mechanize")
|
||||||
# logger.addHandler(logging.StreamHandler(sys.stdout))
|
# logger.addHandler(logging.StreamHandler(sys.stdout))
|
||||||
# logger.setLevel(logging.INFO)
|
# logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
# res = self.browser.open('http://online.barrons.com/logout')
|
# res = self.browser.open('http://online.barrons.com/logout')
|
||||||
# except:
|
# except:
|
||||||
# import traceback
|
# import traceback
|
||||||
# traceback.print_exc()
|
# traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,35 +1,35 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Huan Komrade T <huantnh at gmail.com>'
|
__copyright__ = '2009, Huan Komrade T <huantnh at gmail.com>'
|
||||||
'''
|
'''
|
||||||
bbc.co.uk
|
bbc.co.uk
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class BBCVietnamese(BasicNewsRecipe):
|
class BBCVietnamese(BasicNewsRecipe):
|
||||||
title = u'BBC Vietnamese'
|
title = u'BBC Vietnamese'
|
||||||
__author__ = 'Huan Komrade T'
|
__author__ = 'Huan Komrade T'
|
||||||
description = 'Vietnam news and current affairs from the British Broadcasting Corporation'
|
description = 'Vietnam news and current affairs from the British Broadcasting Corporation'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'vi'
|
language = 'vi'
|
||||||
|
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
recursions = 0
|
recursions = 0
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Index', 'http://www.bbc.co.uk/vietnamese/index.xml'),
|
('Index', 'http://www.bbc.co.uk/vietnamese/index.xml'),
|
||||||
('Vietnam', 'http://www.bbc.co.uk/vietnamese/vietnam/index.xml'),
|
('Vietnam', 'http://www.bbc.co.uk/vietnamese/vietnam/index.xml'),
|
||||||
('Business', 'http://www.bbc.co.uk/vietnamese/business/index.xml'),
|
('Business', 'http://www.bbc.co.uk/vietnamese/business/index.xml'),
|
||||||
('Culture', 'http://www.bbc.co.uk/vietnamese/culture/index.xml'),
|
('Culture', 'http://www.bbc.co.uk/vietnamese/culture/index.xml'),
|
||||||
('Football', 'http://www.bbc.co.uk/vietnamese/football/index.xml'),
|
('Football', 'http://www.bbc.co.uk/vietnamese/football/index.xml'),
|
||||||
('Forum', 'http://www.bbc.co.uk/vietnamese/forum/index.xml'),
|
('Forum', 'http://www.bbc.co.uk/vietnamese/forum/index.xml'),
|
||||||
('In Depth', 'http://www.bbc.co.uk/vietnamese/indepth/index.xml'),
|
('In Depth', 'http://www.bbc.co.uk/vietnamese/indepth/index.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('http://www.bbc.co.uk/vietnamese/', 'http://www.bbc.co.uk/vietnamese/lg/')
|
return url.replace('http://www.bbc.co.uk/vietnamese/', 'http://www.bbc.co.uk/vietnamese/lg/')
|
||||||
|
@ -1,51 +1,51 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
beta.rs
|
beta.rs
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class Danas(BasicNewsRecipe):
|
class Danas(BasicNewsRecipe):
|
||||||
title = 'BETA'
|
title = 'BETA'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Novinska Agencija'
|
description = 'Novinska Agencija'
|
||||||
publisher = 'Beta'
|
publisher = 'Beta'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
|
||||||
lang = 'sr-Latn-RS'
|
lang = 'sr-Latn-RS'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Vesti dana', u'http://www.beta.rs/rssvd.asp')
|
(u'Vesti dana', u'http://www.beta.rs/rssvd.asp')
|
||||||
,(u'Ekonomija' , u'http://www.beta.rs/rssek.asp')
|
,(u'Ekonomija' , u'http://www.beta.rs/rssek.asp')
|
||||||
,(u'Sport' , u'http://www.beta.rs/rsssp.asp')
|
,(u'Sport' , u'http://www.beta.rs/rsssp.asp')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -1,38 +1,37 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
beta.rs
|
beta.rs
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
|
||||||
|
class Danas(BasicNewsRecipe):
|
||||||
class Danas(BasicNewsRecipe):
|
title = 'BETA - English'
|
||||||
title = 'BETA - English'
|
__author__ = 'Darko Miletic'
|
||||||
__author__ = 'Darko Miletic'
|
description = 'Serbian news agency'
|
||||||
description = 'Serbian news agency'
|
publisher = 'Beta'
|
||||||
publisher = 'Beta'
|
category = 'news, politics, Serbia'
|
||||||
category = 'news, politics, Serbia'
|
oldest_article = 2
|
||||||
oldest_article = 2
|
max_articles_per_feed = 100
|
||||||
max_articles_per_feed = 100
|
no_stylesheets = False
|
||||||
no_stylesheets = False
|
use_embedded_content = True
|
||||||
use_embedded_content = True
|
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
lang = 'en'
|
lang = 'en'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://www.beta.rs/rssen.asp')]
|
feeds = [(u'News', u'http://www.beta.rs/rssen.asp')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -1,66 +1,65 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
blic.rs
|
blic.rs
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
|
||||||
|
class Blic(BasicNewsRecipe):
|
||||||
class Blic(BasicNewsRecipe):
|
title = 'Blic'
|
||||||
title = 'Blic'
|
__author__ = 'Darko Miletic'
|
||||||
__author__ = 'Darko Miletic'
|
description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
||||||
description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
publisher = 'RINGIER d.o.o.'
|
||||||
publisher = 'RINGIER d.o.o.'
|
category = 'news, politics, Serbia'
|
||||||
category = 'news, politics, Serbia'
|
delay = 1
|
||||||
delay = 1
|
oldest_article = 2
|
||||||
oldest_article = 2
|
max_articles_per_feed = 100
|
||||||
max_articles_per_feed = 100
|
remove_javascript = True
|
||||||
remove_javascript = True
|
no_stylesheets = True
|
||||||
no_stylesheets = True
|
use_embedded_content = False
|
||||||
use_embedded_content = False
|
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
|
||||||
lang = 'sr-Latn-RS'
|
lang = 'sr-Latn-RS'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
|
||||||
|
|
||||||
feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
|
feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link'])]
|
remove_tags = [dict(name=['object','link'])]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
rest_url = url.partition('?')[2]
|
rest_url = url.partition('?')[2]
|
||||||
return u'http://www.blic.rs/_print.php?' + rest_url
|
return u'http://www.blic.rs/_print.php?' + rest_url
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
attribs = [ 'style','font','valign'
|
attribs = [ 'style','font','valign'
|
||||||
,'colspan','width','height'
|
,'colspan','width','height'
|
||||||
,'rowspan','summary','align'
|
,'rowspan','summary','align'
|
||||||
,'cellspacing','cellpadding'
|
,'cellspacing','cellpadding'
|
||||||
,'frames','rules','border'
|
,'frames','rules','border'
|
||||||
]
|
]
|
||||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
item.name = 'div'
|
item.name = 'div'
|
||||||
for attrib in attribs:
|
for attrib in attribs:
|
||||||
if item.has_key(attrib):
|
if item.has_key(attrib):
|
||||||
del item[attrib]
|
del item[attrib]
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
raw = article.get('link', None)
|
raw = article.get('link', None)
|
||||||
return raw.replace('.co.yu','.rs')
|
return raw.replace('.co.yu','.rs')
|
||||||
|
|
||||||
|
@ -1,95 +1,95 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
borba.rs
|
borba.rs
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Borba(BasicNewsRecipe):
|
class Borba(BasicNewsRecipe):
|
||||||
title = 'Borba Online'
|
title = 'Borba Online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Dnevne novine Borba Online'
|
description = 'Dnevne novine Borba Online'
|
||||||
publisher = 'IP Novine Borba'
|
publisher = 'IP Novine Borba'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
|
||||||
lang = _('sr-Latn-RS')
|
lang = _('sr-Latn-RS')
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
|
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
|
||||||
INDEX = u'http://www.borba.rs/'
|
INDEX = u'http://www.borba.rs/'
|
||||||
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
|
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'main'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'main'})]
|
||||||
|
|
||||||
remove_tags_after = dict(name='div',attrs={'id':'written_comments_title'})
|
remove_tags_after = dict(name='div',attrs={'id':'written_comments_title'})
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','iframe','base','img'])
|
dict(name=['object','link','iframe','base','img'])
|
||||||
,dict(name='div',attrs={'id':'written_comments_title'})
|
,dict(name='div',attrs={'id':'written_comments_title'})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Najnovije vesti', u'http://www.borba.rs/content/blogsection/28/105/')
|
(u'Najnovije vesti', u'http://www.borba.rs/content/blogsection/28/105/')
|
||||||
,(u'Prvi plan' , u'http://www.borba.rs/content/blogsection/4/92/' )
|
,(u'Prvi plan' , u'http://www.borba.rs/content/blogsection/4/92/' )
|
||||||
,(u'Dogadjaji' , u'http://www.borba.rs/content/blogsection/21/83/' )
|
,(u'Dogadjaji' , u'http://www.borba.rs/content/blogsection/21/83/' )
|
||||||
,(u'Ekonomija' , u'http://www.borba.rs/content/blogsection/5/35/' )
|
,(u'Ekonomija' , u'http://www.borba.rs/content/blogsection/5/35/' )
|
||||||
,(u'Komentari' , u'http://www.borba.rs/content/blogsection/23/94/' )
|
,(u'Komentari' , u'http://www.borba.rs/content/blogsection/23/94/' )
|
||||||
,(u'Svet' , u'http://www.borba.rs/content/blogsection/7/36/' )
|
,(u'Svet' , u'http://www.borba.rs/content/blogsection/7/36/' )
|
||||||
,(u'Sport' , u'http://www.borba.rs/content/blogsection/6/37/' )
|
,(u'Sport' , u'http://www.borba.rs/content/blogsection/6/37/' )
|
||||||
,(u'Fama' , u'http://www.borba.rs/content/blogsection/25/89/' )
|
,(u'Fama' , u'http://www.borba.rs/content/blogsection/25/89/' )
|
||||||
,(u'B2 Dodatak' , u'http://www.borba.rs/content/blogsection/30/116/')
|
,(u'B2 Dodatak' , u'http://www.borba.rs/content/blogsection/30/116/')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
attribs = [ 'style','font','valign'
|
attribs = [ 'style','font','valign'
|
||||||
,'colspan','width','height'
|
,'colspan','width','height'
|
||||||
,'rowspan','summary','align'
|
,'rowspan','summary','align'
|
||||||
,'cellspacing','cellpadding'
|
,'cellspacing','cellpadding'
|
||||||
,'frames','rules','border'
|
,'frames','rules','border'
|
||||||
]
|
]
|
||||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
item.name = 'div'
|
item.name = 'div'
|
||||||
for attrib in attribs:
|
for attrib in attribs:
|
||||||
if item.has_key(attrib):
|
if item.has_key(attrib):
|
||||||
del item[attrib]
|
del item[attrib]
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
totalfeeds = []
|
totalfeeds = []
|
||||||
lfeeds = self.get_feeds()
|
lfeeds = self.get_feeds()
|
||||||
for feedobj in lfeeds:
|
for feedobj in lfeeds:
|
||||||
feedtitle, feedurl = feedobj
|
feedtitle, feedurl = feedobj
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
articles = []
|
articles = []
|
||||||
soup = self.index_to_soup(feedurl)
|
soup = self.index_to_soup(feedurl)
|
||||||
for item in soup.findAll('a', attrs={'class':'contentpagetitle'}):
|
for item in soup.findAll('a', attrs={'class':'contentpagetitle'}):
|
||||||
url = item['href']
|
url = item['href']
|
||||||
title = self.tag_to_string(item)
|
title = self.tag_to_string(item)
|
||||||
articles.append({
|
articles.append({
|
||||||
'title' :title
|
'title' :title
|
||||||
,'date' :''
|
,'date' :''
|
||||||
,'url' :url
|
,'url' :url
|
||||||
,'description':''
|
,'description':''
|
||||||
})
|
})
|
||||||
totalfeeds.append((feedtitle, articles))
|
totalfeeds.append((feedtitle, articles))
|
||||||
return totalfeeds
|
return totalfeeds
|
||||||
|
|
||||||
|
@ -1,72 +1,72 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elargentino.com
|
elargentino.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class BsAsEconomico(BasicNewsRecipe):
|
class BsAsEconomico(BasicNewsRecipe):
|
||||||
title = 'Buenos Aires Economico'
|
title = 'Buenos Aires Economico'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Revista Argentina'
|
description = 'Revista Argentina'
|
||||||
publisher = 'ElArgentino.com'
|
publisher = 'ElArgentino.com'
|
||||||
category = 'news, politics, economy, Argentina'
|
category = 'news, politics, economy, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
INDEX = 'http://www.elargentino.com/medios/121/Buenos-Aires-Economico.html'
|
INDEX = 'http://www.elargentino.com/medios/121/Buenos-Aires-Economico.html'
|
||||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||||
|
|
||||||
remove_tags = [dict(name='link')]
|
remove_tags = [dict(name='link')]
|
||||||
|
|
||||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=121&Content-Type=text/xml&ChannelDesc=Buenos%20Aires%20Econ%C3%B3mico')]
|
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=121&Content-Type=text/xml&ChannelDesc=Buenos%20Aires%20Econ%C3%B3mico')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
main, sep, article_part = url.partition('/nota-')
|
main, sep, article_part = url.partition('/nota-')
|
||||||
article_id, rsep, rrest = article_part.partition('-')
|
article_id, rsep, rrest = article_part.partition('-')
|
||||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir' ] = self.direction
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||||
if cover_item:
|
if cover_item:
|
||||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
def image_url_processor(self, baseurl, url):
|
||||||
base, sep, rest = url.rpartition('?Id=')
|
base, sep, rest = url.rpartition('?Id=')
|
||||||
img, sep2, rrest = rest.partition('&')
|
img, sep2, rrest = rest.partition('&')
|
||||||
return base + sep + img
|
return base + sep + img
|
||||||
|
@ -1,46 +1,46 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
chicagobreakingnews.com
|
chicagobreakingnews.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ChicagoBreakingNews(BasicNewsRecipe):
|
class ChicagoBreakingNews(BasicNewsRecipe):
|
||||||
title = 'Chicago Breaking News'
|
title = 'Chicago Breaking News'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Breaking News from Chicago'
|
description = 'Breaking News from Chicago'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
publisher = 'Chicago Breaking News'
|
publisher = 'Chicago Breaking News'
|
||||||
category = 'news, politics, USA, Chicago'
|
category = 'news, politics, USA, Chicago'
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [(u'Breaking news', u'http://feeds2.feedburner.com/ChicagoBreakingNews/')]
|
feeds = [(u'Breaking news', u'http://feeds2.feedburner.com/ChicagoBreakingNews/')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
links = soup.findAll('a')
|
links = soup.findAll('a')
|
||||||
for item in soup.findAll('a'):
|
for item in soup.findAll('a'):
|
||||||
if item['href'].find('http://feedads.googleadservices.com') > -1:
|
if item['href'].find('http://feedads.googleadservices.com') > -1:
|
||||||
item.extract()
|
item.extract()
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
for item in soup.findAll(color=True):
|
for item in soup.findAll(color=True):
|
||||||
del item['color']
|
del item['color']
|
||||||
for item in soup.findAll(size=True):
|
for item in soup.findAll(size=True):
|
||||||
del item['size']
|
del item['size']
|
||||||
return soup
|
return soup
|
||||||
|
@ -3,11 +3,7 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
|
||||||
from urlparse import urlparse, urlunparse
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
|
||||||
from threading import RLock
|
|
||||||
|
|
||||||
class ChicagoTribune(BasicNewsRecipe):
|
class ChicagoTribune(BasicNewsRecipe):
|
||||||
|
|
||||||
|
@ -1,73 +1,73 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
clarin.com
|
clarin.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class Clarin(BasicNewsRecipe):
|
class Clarin(BasicNewsRecipe):
|
||||||
title = 'Clarin'
|
title = 'Clarin'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Argentina y mundo'
|
description = 'Noticias de Argentina y mundo'
|
||||||
publisher = 'Grupo Clarin'
|
publisher = 'Grupo Clarin'
|
||||||
category = 'news, politics, Argentina'
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
|
extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='a' , attrs={'class':'Imp' })
|
dict(name='a' , attrs={'class':'Imp' })
|
||||||
,dict(name='div' , attrs={'class':'Perma' })
|
,dict(name='div' , attrs={'class':'Perma' })
|
||||||
,dict(name='h1' , text='Imprimir' )
|
,dict(name='h1' , text='Imprimir' )
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Ultimo Momento', u'http://www.clarin.com/diario/hoy/um/sumariorss.xml')
|
(u'Ultimo Momento', u'http://www.clarin.com/diario/hoy/um/sumariorss.xml')
|
||||||
,(u'El Pais' , u'http://www.clarin.com/diario/hoy/elpais.xml' )
|
,(u'El Pais' , u'http://www.clarin.com/diario/hoy/elpais.xml' )
|
||||||
,(u'Opinion' , u'http://www.clarin.com/diario/hoy/opinion.xml' )
|
,(u'Opinion' , u'http://www.clarin.com/diario/hoy/opinion.xml' )
|
||||||
,(u'El Mundo' , u'http://www.clarin.com/diario/hoy/elmundo.xml' )
|
,(u'El Mundo' , u'http://www.clarin.com/diario/hoy/elmundo.xml' )
|
||||||
,(u'Sociedad' , u'http://www.clarin.com/diario/hoy/sociedad.xml' )
|
,(u'Sociedad' , u'http://www.clarin.com/diario/hoy/sociedad.xml' )
|
||||||
,(u'La Ciudad' , u'http://www.clarin.com/diario/hoy/laciudad.xml' )
|
,(u'La Ciudad' , u'http://www.clarin.com/diario/hoy/laciudad.xml' )
|
||||||
,(u'Policiales' , u'http://www.clarin.com/diario/hoy/policiales.xml' )
|
,(u'Policiales' , u'http://www.clarin.com/diario/hoy/policiales.xml' )
|
||||||
,(u'Deportes' , u'http://www.clarin.com/diario/hoy/deportes.xml' )
|
,(u'Deportes' , u'http://www.clarin.com/diario/hoy/deportes.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
rest = url.partition('-0')[-1]
|
rest = url.partition('-0')[-1]
|
||||||
lmain = rest.partition('.')[0]
|
lmain = rest.partition('.')[0]
|
||||||
lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
||||||
return lurl
|
return lurl
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir' ] = self.direction
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -1,46 +1,46 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
climateprogress.org
|
climateprogress.org
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class ClimateProgress(BasicNewsRecipe):
|
class ClimateProgress(BasicNewsRecipe):
|
||||||
title = 'Climate Progress'
|
title = 'Climate Progress'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = "An insider's view of climate science, politics and solutions"
|
description = "An insider's view of climate science, politics and solutions"
|
||||||
publisher = 'Climate Progress'
|
publisher = 'Climate Progress'
|
||||||
category = 'news, ecology, climate, blog'
|
category = 'news, ecology, climate, blog'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
lang = 'en-US'
|
lang = 'en-US'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [(u'Posts', u'http://feeds.feedburner.com/climateprogress/lCrX')]
|
feeds = [(u'Posts', u'http://feeds.feedburner.com/climateprogress/lCrX')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir' ] = self.direction
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
@ -1,41 +1,41 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.codinghorror.com/blog/
|
www.codinghorror.com/blog/
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class CodingHorror(BasicNewsRecipe):
|
class CodingHorror(BasicNewsRecipe):
|
||||||
title = 'Coding Horror'
|
title = 'Coding Horror'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'programming and human factors - Jeff Atwood'
|
description = 'programming and human factors - Jeff Atwood'
|
||||||
category = 'blog, programming'
|
category = 'blog, programming'
|
||||||
publisher = 'Jeff Atwood'
|
publisher = 'Jeff Atwood'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
author = 'Jeff Atwood'
|
author = 'Jeff Atwood'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
, '--author' , author
|
, '--author' , author
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nauthors="' + author + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nauthors="' + author + '"'
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link'])
|
dict(name=['object','link'])
|
||||||
,dict(name='div',attrs={'class':'feedflare'})
|
,dict(name='div',attrs={'class':'feedflare'})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/codinghorror' )]
|
feeds = [(u'Articles', u'http://feeds2.feedburner.com/codinghorror' )]
|
||||||
|
|
||||||
|
@ -1,46 +1,46 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.corriere.it/english
|
www.corriere.it/english
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Corriere_en(BasicNewsRecipe):
|
class Corriere_en(BasicNewsRecipe):
|
||||||
title = 'Corriere della Sera in English'
|
title = 'Corriere della Sera in English'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Milan and Italy'
|
description = 'News from Milan and Italy'
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
publisher = 'Corriere della Sera'
|
publisher = 'Corriere della Sera'
|
||||||
category = 'news, politics, Italy'
|
category = 'news, politics, Italy'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['base','object','link','embed','img'])
|
dict(name=['base','object','link','embed','img'])
|
||||||
,dict(name='div', attrs={'class':'news-goback'})
|
,dict(name='div', attrs={'class':'news-goback'})
|
||||||
,dict(name='ul', attrs={'class':'toolbar'})
|
,dict(name='ul', attrs={'class':'toolbar'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
||||||
|
|
||||||
feeds = [(u'Italian Life', u'http://www.corriere.it/rss/english.xml')]
|
feeds = [(u'Italian Life', u'http://www.corriere.it/rss/english.xml')]
|
||||||
|
|
||||||
|
@ -1,56 +1,56 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.corriere.it
|
www.corriere.it
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
class Corriere_it(BasicNewsRecipe):
|
class Corriere_it(BasicNewsRecipe):
|
||||||
title = 'Corriere della Sera'
|
title = 'Corriere della Sera'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Milan and Italy'
|
description = 'News from Milan and Italy'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
publisher = 'Corriere della Sera'
|
publisher = 'Corriere della Sera'
|
||||||
category = 'news, politics, Italy'
|
category = 'news, politics, Italy'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
language = 'it'
|
language = 'it'
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['base','object','link','embed','img'])
|
dict(name=['base','object','link','embed','img'])
|
||||||
,dict(name='div', attrs={'class':'news-goback'})
|
,dict(name='div', attrs={'class':'news-goback'})
|
||||||
,dict(name='ul', attrs={'class':'toolbar'})
|
,dict(name='ul', attrs={'class':'toolbar'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' )
|
(u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' )
|
||||||
,(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' )
|
,(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' )
|
||||||
,(u'Economia' , u'http://www.corriere.it/rss/economia.xml' )
|
,(u'Economia' , u'http://www.corriere.it/rss/economia.xml' )
|
||||||
,(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml')
|
,(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml')
|
||||||
,(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' )
|
,(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' )
|
||||||
,(u'Politica' , u'http://www.corriere.it/rss/politica.xml' )
|
,(u'Politica' , u'http://www.corriere.it/rss/politica.xml' )
|
||||||
,(u'Salute' , u'http://www.corriere.it/rss/salute.xml' )
|
,(u'Salute' , u'http://www.corriere.it/rss/salute.xml' )
|
||||||
,(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' )
|
,(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' )
|
||||||
,(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml')
|
,(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml')
|
||||||
,(u'Sport' , u'http://www.corriere.it/rss/sport.xml' )
|
,(u'Sport' , u'http://www.corriere.it/rss/sport.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -7,7 +7,6 @@ Courrier International
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from datetime import date
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class CourrierInternational(BasicNewsRecipe):
|
class CourrierInternational(BasicNewsRecipe):
|
||||||
@ -21,12 +20,12 @@ class CourrierInternational(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
html2lrf_options = ['--base-font-size', '10']
|
html2lrf_options = ['--base-font-size', '10']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
# Some articles requiring subscription fails on download.
|
# Some articles requiring subscription fails on download.
|
||||||
('A la Une', 'http://www.courrierinternational.com/rss/rss_a_la_une.xml'),
|
('A la Une', 'http://www.courrierinternational.com/rss/rss_a_la_une.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
||||||
[
|
[
|
||||||
#Handle Depeches
|
#Handle Depeches
|
||||||
@ -35,8 +34,8 @@ class CourrierInternational(BasicNewsRecipe):
|
|||||||
(r'.*<td [^>]*>(Courrier international.*?) <td width="10"><img src="/img/espaceur.gif"></td>.*', lambda match : '<html><body><table><tr><td>'+match.group(1)+'</body></html>'),
|
(r'.*<td [^>]*>(Courrier international.*?) <td width="10"><img src="/img/espaceur.gif"></td>.*', lambda match : '<html><body><table><tr><td>'+match.group(1)+'</body></html>'),
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return re.sub('/[a-zA-Z]+\.asp','/imprimer.asp' ,url)
|
return re.sub('/[a-zA-Z]+\.asp','/imprimer.asp' ,url)
|
||||||
|
|
||||||
|
@ -1,62 +1,62 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
criticadigital.com
|
criticadigital.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class CriticaDigital(BasicNewsRecipe):
|
class CriticaDigital(BasicNewsRecipe):
|
||||||
title = 'Critica de la Argentina'
|
title = 'Critica de la Argentina'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Argentina'
|
description = 'Noticias de Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , 'news, Argentina'
|
, '--category' , 'news, Argentina'
|
||||||
, '--publisher' , title
|
, '--publisher' , title
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'bloqueTitulosNoticia'})
|
dict(name='div', attrs={'class':'bloqueTitulosNoticia'})
|
||||||
,dict(name='div', attrs={'id':'c453-1' })
|
,dict(name='div', attrs={'id':'c453-1' })
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':'box300' })
|
dict(name='div', attrs={'class':'box300' })
|
||||||
,dict(name='div', style=True )
|
,dict(name='div', style=True )
|
||||||
,dict(name='div', attrs={'class':'titcomentario'})
|
,dict(name='div', attrs={'class':'titcomentario'})
|
||||||
,dict(name='div', attrs={'class':'comentario' })
|
,dict(name='div', attrs={'class':'comentario' })
|
||||||
,dict(name='div', attrs={'class':'paginador' })
|
,dict(name='div', attrs={'class':'paginador' })
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica' )
|
(u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica' )
|
||||||
,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia' )
|
,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia' )
|
||||||
,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes' )
|
,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes' )
|
||||||
,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos')
|
,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos')
|
||||||
,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo' )
|
,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo' )
|
||||||
,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales' )
|
,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales' )
|
||||||
,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad' )
|
,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad' )
|
||||||
,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud' )
|
,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud' )
|
||||||
,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia' )
|
,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia' )
|
||||||
,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe' )
|
,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
index = 'http://www.criticadigital.com/impresa/'
|
index = 'http://www.criticadigital.com/impresa/'
|
||||||
soup = self.index_to_soup(index)
|
soup = self.index_to_soup(index)
|
||||||
link_item = soup.find('div',attrs={'class':'tapa'})
|
link_item = soup.find('div',attrs={'class':'tapa'})
|
||||||
if link_item:
|
if link_item:
|
||||||
cover_url = index + link_item.img['src']
|
cover_url = index + link_item.img['src']
|
||||||
return cover_url
|
return cover_url
|
||||||
|
@ -1,45 +1,44 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
newyorker.com
|
newyorker.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
|
||||||
|
class CubaDebate(BasicNewsRecipe):
|
||||||
class CubaDebate(BasicNewsRecipe):
|
title = 'CubaDebate'
|
||||||
title = 'CubaDebate'
|
__author__ = 'Darko Miletic'
|
||||||
__author__ = 'Darko Miletic'
|
description = 'Contra el Terorismo Mediatico'
|
||||||
description = 'Contra el Terorismo Mediatico'
|
oldest_article = 15
|
||||||
oldest_article = 15
|
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
publisher = 'Cubadebate'
|
publisher = 'Cubadebate'
|
||||||
category = 'news, politics, Cuba'
|
category = 'news, politics, Cuba'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
extra_css = ' #BlogTitle{font-size: x-large; font-weight: bold} '
|
extra_css = ' #BlogTitle{font-size: x-large; font-weight: bold} '
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description
|
||||||
,'tags' : category
|
,'tags' : category
|
||||||
,'language' : 'es'
|
,'language' : 'es'
|
||||||
,'publisher' : publisher
|
,'publisher' : publisher
|
||||||
,'pretty_print': True
|
,'pretty_print': True
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'Outline'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'Outline'})]
|
||||||
remove_tags_after = dict(name='div',attrs={'id':'BlogContent'})
|
remove_tags_after = dict(name='div',attrs={'id':'BlogContent'})
|
||||||
remove_tags = [dict(name='link')]
|
remove_tags = [dict(name='link')]
|
||||||
|
|
||||||
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url + 'print/'
|
return url + 'print/'
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -1,34 +1,34 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class TheDailyMail(BasicNewsRecipe):
|
class TheDailyMail(BasicNewsRecipe):
|
||||||
title = u'The Daily Mail'
|
title = u'The Daily Mail'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
author = 'RufusA'
|
author = 'RufusA'
|
||||||
simultaneous_downloads= 1
|
simultaneous_downloads= 1
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
|
|
||||||
extra_css = 'h1 {text-align: left;}'
|
extra_css = 'h1 {text-align: left;}'
|
||||||
|
|
||||||
remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ]
|
remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ]
|
||||||
remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'})
|
remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'})
|
||||||
remove_tags_before = dict(name='div', attrs={'id':'content'})
|
remove_tags_before = dict(name='div', attrs={'id':'content'})
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
|
(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
|
||||||
(u'News', u'http://www.dailymail.co.uk/news/index.rss'),
|
(u'News', u'http://www.dailymail.co.uk/news/index.rss'),
|
||||||
(u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
|
(u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
|
||||||
(u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
|
(u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
|
||||||
(u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
|
(u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
|
||||||
(u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
|
(u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
|
||||||
(u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
|
(u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
|
||||||
(u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
|
(u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
|
||||||
(u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
|
(u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
|
||||||
(u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
|
(u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
|
||||||
(u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')]
|
(u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
main = url.partition('?')[0]
|
main = url.partition('?')[0]
|
||||||
return main + '?printingPage=true'
|
return main + '?printingPage=true'
|
||||||
|
@ -1,62 +1,62 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
danas.rs
|
danas.rs
|
||||||
'''
|
'''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class Danas(BasicNewsRecipe):
|
class Danas(BasicNewsRecipe):
|
||||||
title = 'Danas'
|
title = 'Danas'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Vesti'
|
description = 'Vesti'
|
||||||
publisher = 'Danas d.o.o.'
|
publisher = 'Danas d.o.o.'
|
||||||
category = 'news, politics, Serbia'
|
category = 'news, politics, Serbia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
|
||||||
lang = 'sr-Latn-RS'
|
lang = 'sr-Latn-RS'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
||||||
,dict(name='div', attrs={'id':'comments'})
|
,dict(name='div', attrs={'id':'comments'})
|
||||||
,dict(name=['object','link'])
|
,dict(name=['object','link'])
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
attribs = [ 'style','font','valign'
|
attribs = [ 'style','font','valign'
|
||||||
,'colspan','width','height'
|
,'colspan','width','height'
|
||||||
,'rowspan','summary','align'
|
,'rowspan','summary','align'
|
||||||
,'cellspacing','cellpadding'
|
,'cellspacing','cellpadding'
|
||||||
,'frames','rules','border'
|
,'frames','rules','border'
|
||||||
]
|
]
|
||||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
item.name = 'div'
|
item.name = 'div'
|
||||||
for attrib in attribs:
|
for attrib in attribs:
|
||||||
if item.has_key(attrib):
|
if item.has_key(attrib):
|
||||||
del item[attrib]
|
del item[attrib]
|
||||||
return soup
|
return soup
|
||||||
|
@ -1,76 +1,76 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.nieuwsblad.be
|
www.nieuwsblad.be
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class DeGentenaarOnline(BasicNewsRecipe):
|
class DeGentenaarOnline(BasicNewsRecipe):
|
||||||
title = 'De Gentenaar Online'
|
title = 'De Gentenaar Online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Belgium in Dutch'
|
description = 'News from Belgium in Dutch'
|
||||||
publisher = 'De Gentenaar'
|
publisher = 'De Gentenaar'
|
||||||
category = 'news, politics, Belgium'
|
category = 'news, politics, Belgium'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'nl'
|
language = 'nl'
|
||||||
|
|
||||||
lang = 'nl-BE'
|
lang = 'nl-BE'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='span', attrs={'id':['lblArticleTitle','lblArticleIntroduction','lblArticleMainText']})]
|
keep_only_tags = [dict(name='span', attrs={'id':['lblArticleTitle','lblArticleIntroduction','lblArticleMainText']})]
|
||||||
remove_tags = [dict(name=['embed','object'])]
|
remove_tags = [dict(name=['embed','object'])]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Snelnieuws' , u'http://feeds.nieuwsblad.be/nieuws/snelnieuws' )
|
(u'Snelnieuws' , u'http://feeds.nieuwsblad.be/nieuws/snelnieuws' )
|
||||||
,(u'Binnenland' , u'http://feeds.nieuwsblad.be/nieuws/binnenland' )
|
,(u'Binnenland' , u'http://feeds.nieuwsblad.be/nieuws/binnenland' )
|
||||||
,(u'Buitenland' , u'http://feeds.nieuwsblad.be/nieuwsblad/buitenland' )
|
,(u'Buitenland' , u'http://feeds.nieuwsblad.be/nieuwsblad/buitenland' )
|
||||||
,(u'Economie' , u'http://feeds.nieuwsblad.be/economie/home' )
|
,(u'Economie' , u'http://feeds.nieuwsblad.be/economie/home' )
|
||||||
,(u'Economie' , u'http://feeds.nieuwsblad.be/economie/home' )
|
,(u'Economie' , u'http://feeds.nieuwsblad.be/economie/home' )
|
||||||
,(u'Algemeen' , u'http://feeds.nieuwsblad.be/life/algemeen' )
|
,(u'Algemeen' , u'http://feeds.nieuwsblad.be/life/algemeen' )
|
||||||
,(u'Film' , u'http://feeds.nieuwsblad.be/life/film' )
|
,(u'Film' , u'http://feeds.nieuwsblad.be/life/film' )
|
||||||
,(u'Boek' , u'http://feeds.nieuwsblad.be/life/boeken' )
|
,(u'Boek' , u'http://feeds.nieuwsblad.be/life/boeken' )
|
||||||
,(u'Muziek' , u'http://feeds.nieuwsblad.be/life/muziek' )
|
,(u'Muziek' , u'http://feeds.nieuwsblad.be/life/muziek' )
|
||||||
,(u'Podium' , u'http://feeds.nieuwsblad.be/life/podium' )
|
,(u'Podium' , u'http://feeds.nieuwsblad.be/life/podium' )
|
||||||
,(u'TV & radio' , u'http://feeds.nieuwsblad.be/life/tv' )
|
,(u'TV & radio' , u'http://feeds.nieuwsblad.be/life/tv' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/Detail.aspx?articleid','/PrintArticle.aspx?ArticleID')
|
return url.replace('/Detail.aspx?articleid','/PrintArticle.aspx?ArticleID')
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
return article.get('guid', None)
|
return article.get('guid', None)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
del soup.body['onload']
|
del soup.body['onload']
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
for item in soup.findAll('span'):
|
for item in soup.findAll('span'):
|
||||||
item.name='div'
|
item.name='div'
|
||||||
if item.has_key('id') and item['id'] == 'lblArticleTitle':
|
if item.has_key('id') and item['id'] == 'lblArticleTitle':
|
||||||
item.name='h3'
|
item.name='h3'
|
||||||
|
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir' ] = self.direction
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -1,69 +1,69 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||||
|
|
||||||
''' http://www.derstandard.at - Austrian Newspaper '''
|
''' http://www.derstandard.at - Austrian Newspaper '''
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class DerStandardRecipe(BasicNewsRecipe):
|
class DerStandardRecipe(BasicNewsRecipe):
|
||||||
title = u'derStandard'
|
title = u'derStandard'
|
||||||
__author__ = 'Gerhard Aigner'
|
__author__ = 'Gerhard Aigner'
|
||||||
description = u'Nachrichten aus Österreich'
|
description = u'Nachrichten aus Österreich'
|
||||||
publisher ='derStandard.at'
|
publisher ='derStandard.at'
|
||||||
category = 'news, politics, nachrichten, Austria'
|
category = 'news, politics, nachrichten, Austria'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
lang = 'de-AT'
|
lang = 'de-AT'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'de'
|
language = 'de'
|
||||||
|
|
||||||
recursions = 0
|
recursions = 0
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
|
feeds = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
|
||||||
(u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
|
(u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
|
||||||
(u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
|
(u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
|
||||||
(u'Web', u'http://derstandard.at/?page=rss&ressort=webstandard'),
|
(u'Web', u'http://derstandard.at/?page=rss&ressort=webstandard'),
|
||||||
(u'Sport', u'http://derstandard.at/?page=rss&ressort=sport'),
|
(u'Sport', u'http://derstandard.at/?page=rss&ressort=sport'),
|
||||||
(u'Panorama', u'http://derstandard.at/?page=rss&ressort=panorama'),
|
(u'Panorama', u'http://derstandard.at/?page=rss&ressort=panorama'),
|
||||||
(u'Etat', u'http://derstandard.at/?page=rss&ressort=etat'),
|
(u'Etat', u'http://derstandard.at/?page=rss&ressort=etat'),
|
||||||
(u'Kultur', u'http://derstandard.at/?page=rss&ressort=kultur'),
|
(u'Kultur', u'http://derstandard.at/?page=rss&ressort=kultur'),
|
||||||
(u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
|
(u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
|
||||||
(u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
|
(u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
|
||||||
(u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
|
(u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
|
||||||
remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
|
remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
|
||||||
dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
|
dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'\[[\d]*\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
(re.compile(r'\[[\d]*\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||||
(re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
|
(re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('?id=', 'txt/?id=')
|
return url.replace('?id=', 'txt/?id=')
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
'''if the article links to a index page (ressort) or a picture gallery
|
'''if the article links to a index page (ressort) or a picture gallery
|
||||||
(ansichtssache), don't add it'''
|
(ansichtssache), don't add it'''
|
||||||
if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
|
if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
|
||||||
return None
|
return None
|
||||||
return article.link
|
return article.link
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
return soup
|
return soup
|
@ -1,72 +1,72 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elargentino.com
|
elargentino.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class Diagonales(BasicNewsRecipe):
|
class Diagonales(BasicNewsRecipe):
|
||||||
title = 'Diagonales'
|
title = 'Diagonales'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'El nuevo diario de La Plata'
|
description = 'El nuevo diario de La Plata'
|
||||||
publisher = 'ElArgentino.com'
|
publisher = 'ElArgentino.com'
|
||||||
category = 'news, politics, Argentina, La Plata'
|
category = 'news, politics, Argentina, La Plata'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
lang = 'es-AR'
|
lang = 'es-AR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
INDEX = 'http://www.elargentino.com/medios/122/Diagonales.html'
|
INDEX = 'http://www.elargentino.com/medios/122/Diagonales.html'
|
||||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||||
|
|
||||||
remove_tags = [dict(name='link')]
|
remove_tags = [dict(name='link')]
|
||||||
|
|
||||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=122&Content-Type=text/xml&ChannelDesc=Diagonales')]
|
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=122&Content-Type=text/xml&ChannelDesc=Diagonales')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
main, sep, article_part = url.partition('/nota-')
|
main, sep, article_part = url.partition('/nota-')
|
||||||
article_id, rsep, rrest = article_part.partition('-')
|
article_id, rsep, rrest = article_part.partition('-')
|
||||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir' ] = self.direction
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||||
if cover_item:
|
if cover_item:
|
||||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
def image_url_processor(self, baseurl, url):
|
def image_url_processor(self, baseurl, url):
|
||||||
base, sep, rest = url.rpartition('?Id=')
|
base, sep, rest = url.rpartition('?Id=')
|
||||||
img, sep2, rrest = rest.partition('&')
|
img, sep2, rrest = rest.partition('&')
|
||||||
return base + sep + img
|
return base + sep + img
|
||||||
|
@ -1,73 +1,73 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||||
|
|
||||||
''' http://www.diepresse.at - Austrian Newspaper '''
|
''' http://www.diepresse.at - Austrian Newspaper '''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class DiePresseRecipe(BasicNewsRecipe):
|
class DiePresseRecipe(BasicNewsRecipe):
|
||||||
title = u'diePresse'
|
title = u'diePresse'
|
||||||
__author__ = 'Gerhard Aigner'
|
__author__ = 'Gerhard Aigner'
|
||||||
description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.'
|
description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.'
|
||||||
publisher ='DiePresse.com'
|
publisher ='DiePresse.com'
|
||||||
category = 'news, politics, nachrichten, Austria'
|
category = 'news, politics, nachrichten, Austria'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
lang = 'de-AT'
|
lang = 'de-AT'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'ISO-8859-1'
|
encoding = 'ISO-8859-1'
|
||||||
language = 'de'
|
language = 'de'
|
||||||
|
|
||||||
recursions = 0
|
recursions = 0
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'Textversion', re.DOTALL), lambda match: ''),
|
(re.compile(r'Textversion', re.DOTALL), lambda match: ''),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [dict(name='hr'),
|
remove_tags = [dict(name='hr'),
|
||||||
dict(name='br'),
|
dict(name='br'),
|
||||||
dict(name='small'),
|
dict(name='small'),
|
||||||
dict(name='img'),
|
dict(name='img'),
|
||||||
dict(name='div', attrs={'class':'textnavi'}),
|
dict(name='div', attrs={'class':'textnavi'}),
|
||||||
dict(name='h1', attrs={'class':'titel'}),
|
dict(name='h1', attrs={'class':'titel'}),
|
||||||
dict(name='a', attrs={'class':'print'}),
|
dict(name='a', attrs={'class':'print'}),
|
||||||
dict(name='div', attrs={'class':'hline'})]
|
dict(name='div', attrs={'class':'hline'})]
|
||||||
|
|
||||||
feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'),
|
feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'),
|
||||||
(u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'),
|
(u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'),
|
||||||
(u'Europa', u'http://diepresse.com/rss/EU'),
|
(u'Europa', u'http://diepresse.com/rss/EU'),
|
||||||
(u'Panorama', u'http://diepresse.com/rss/Panorama'),
|
(u'Panorama', u'http://diepresse.com/rss/Panorama'),
|
||||||
(u'Sport', u'http://diepresse.com/rss/Sport'),
|
(u'Sport', u'http://diepresse.com/rss/Sport'),
|
||||||
(u'Kultur', u'http://diepresse.com/rss/Kultur'),
|
(u'Kultur', u'http://diepresse.com/rss/Kultur'),
|
||||||
(u'Leben', u'http://diepresse.com/rss/Leben'),
|
(u'Leben', u'http://diepresse.com/rss/Leben'),
|
||||||
(u'Tech', u'http://diepresse.com/rss/Tech'),
|
(u'Tech', u'http://diepresse.com/rss/Tech'),
|
||||||
(u'Wissenschaft', u'http://diepresse.com/rss/Science'),
|
(u'Wissenschaft', u'http://diepresse.com/rss/Science'),
|
||||||
(u'Bildung', u'http://diepresse.com/rss/Bildung'),
|
(u'Bildung', u'http://diepresse.com/rss/Bildung'),
|
||||||
(u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'),
|
(u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'),
|
||||||
(u'Recht', u'http://diepresse.com/rss/Recht'),
|
(u'Recht', u'http://diepresse.com/rss/Recht'),
|
||||||
(u'Spectrum', u'http://diepresse.com/rss/Spectrum'),
|
(u'Spectrum', u'http://diepresse.com/rss/Spectrum'),
|
||||||
(u'Meinung', u'http://diepresse.com/rss/Meinung')]
|
(u'Meinung', u'http://diepresse.com/rss/Meinung')]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('home','text/home')
|
return url.replace('home','text/home')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
return soup
|
return soup
|
@ -1,69 +1,69 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
dnevniavaz.ba
|
dnevniavaz.ba
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class DnevniAvaz(BasicNewsRecipe):
|
class DnevniAvaz(BasicNewsRecipe):
|
||||||
title = 'Dnevni Avaz'
|
title = 'Dnevni Avaz'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Latest news from Bosnia'
|
description = 'Latest news from Bosnia'
|
||||||
publisher = 'Dnevni Avaz'
|
publisher = 'Dnevni Avaz'
|
||||||
category = 'news, politics, Bosnia and Herzegovina'
|
category = 'news, politics, Bosnia and Herzegovina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
|
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
|
||||||
lang = 'bs-BA'
|
lang = 'bs-BA'
|
||||||
language = 'bs'
|
language = 'bs'
|
||||||
|
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['fullarticle-title','fullarticle-leading','fullarticle-date','fullarticle-text','articleauthor']})]
|
keep_only_tags = [dict(name='div', attrs={'id':['fullarticle-title','fullarticle-leading','fullarticle-date','fullarticle-text','articleauthor']})]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','base'])]
|
remove_tags = [dict(name=['object','link','base'])]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Najnovije' , u'http://www.dnevniavaz.ba/rss/novo' )
|
(u'Najnovije' , u'http://www.dnevniavaz.ba/rss/novo' )
|
||||||
,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
|
,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
|
||||||
]
|
]
|
||||||
|
|
||||||
def replace_tagname(self,soup,tagname,tagid,newtagname):
|
def replace_tagname(self,soup,tagname,tagid,newtagname):
|
||||||
headtag = soup.find(tagname,attrs={'id':tagid})
|
headtag = soup.find(tagname,attrs={'id':tagid})
|
||||||
if headtag:
|
if headtag:
|
||||||
headtag.name = newtagname
|
headtag.name = newtagname
|
||||||
return
|
return
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
self.replace_tagname(soup,'div','fullarticle-title' ,'h1')
|
self.replace_tagname(soup,'div','fullarticle-title' ,'h1')
|
||||||
self.replace_tagname(soup,'div','fullarticle-leading','h3')
|
self.replace_tagname(soup,'div','fullarticle-leading','h3')
|
||||||
self.replace_tagname(soup,'div','fullarticle-date' ,'h5')
|
self.replace_tagname(soup,'div','fullarticle-date' ,'h5')
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -1,75 +1,75 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
dnevnik.hr
|
dnevnik.hr
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class DnevnikCro(BasicNewsRecipe):
|
class DnevnikCro(BasicNewsRecipe):
|
||||||
title = 'Dnevnik - Hr'
|
title = 'Dnevnik - Hr'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = "Vijesti iz Hrvatske"
|
description = "Vijesti iz Hrvatske"
|
||||||
publisher = 'Dnevnik.hr'
|
publisher = 'Dnevnik.hr'
|
||||||
category = 'news, politics, Croatia'
|
category = 'news, politics, Croatia'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
delay = 4
|
delay = 4
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'hr'
|
language = 'hr'
|
||||||
|
|
||||||
lang = 'hr-HR'
|
lang = 'hr-HR'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','embed'])
|
dict(name=['object','link','embed'])
|
||||||
,dict(name='div', attrs={'class':'menu'})
|
,dict(name='div', attrs={'class':'menu'})
|
||||||
,dict(name='div', attrs={'id':'video'})
|
,dict(name='div', attrs={'id':'video'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = dict(name='div', attrs={'id':'content'})
|
remove_tags_after = dict(name='div', attrs={'id':'content'})
|
||||||
|
|
||||||
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
|
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir' ] = self.direction
|
||||||
|
|
||||||
attribs = [ 'style','font','valign'
|
attribs = [ 'style','font','valign'
|
||||||
,'colspan','width','height'
|
,'colspan','width','height'
|
||||||
,'rowspan','summary','align'
|
,'rowspan','summary','align'
|
||||||
,'cellspacing','cellpadding'
|
,'cellspacing','cellpadding'
|
||||||
,'frames','rules','border'
|
,'frames','rules','border'
|
||||||
]
|
]
|
||||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||||
item.name = 'div'
|
item.name = 'div'
|
||||||
for attrib in attribs:
|
for attrib in attribs:
|
||||||
if item.has_key(attrib):
|
if item.has_key(attrib):
|
||||||
del item[attrib]
|
del item[attrib]
|
||||||
|
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
@ -1,59 +1,59 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
e-novine.com
|
e-novine.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class E_novine(BasicNewsRecipe):
|
class E_novine(BasicNewsRecipe):
|
||||||
title = 'E-Novine'
|
title = 'E-Novine'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Serbia'
|
description = 'News from Serbia'
|
||||||
publisher = 'E-novine'
|
publisher = 'E-novine'
|
||||||
category = 'news, politics, Balcans'
|
category = 'news, politics, Balcans'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'sr'
|
language = 'sr'
|
||||||
|
|
||||||
lang = 'sr'
|
lang = 'sr'
|
||||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
|
keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
||||||
|
|
||||||
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
|
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
|
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
|
||||||
if ftag:
|
if ftag:
|
||||||
it = ftag.div
|
it = ftag.div
|
||||||
it.extract()
|
it.extract()
|
||||||
ftag.div.extract()
|
ftag.div.extract()
|
||||||
ftag.insert(0,it)
|
ftag.insert(0,it)
|
||||||
return soup
|
return soup
|
||||||
|
@ -1,32 +1,32 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
EcoGeek.org
|
EcoGeek.org
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class EcoGeek(BasicNewsRecipe):
|
class EcoGeek(BasicNewsRecipe):
|
||||||
title = 'EcoGeek'
|
title = 'EcoGeek'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'EcoGeek - Technology for the Environment Blog Feed'
|
description = 'EcoGeek - Technology for the Environment Blog Feed'
|
||||||
publisher = 'EcoGeek'
|
publisher = 'EcoGeek'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
category = 'news, ecology, blog'
|
category = 'news, ecology, blog'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [(u'Posts', u'http://feeds2.feedburner.com/EcoGeek')]
|
feeds = [(u'Posts', u'http://feeds2.feedburner.com/EcoGeek')]
|
||||||
|
@ -1,62 +1,61 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
emol.com
|
emol.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ElMercurio(BasicNewsRecipe):
|
class ElMercurio(BasicNewsRecipe):
|
||||||
title = 'El Mercurio online'
|
title = 'El Mercurio online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'El sitio de noticias online de Chile'
|
description = 'El sitio de noticias online de Chile'
|
||||||
publisher = 'El Mercurio'
|
publisher = 'El Mercurio'
|
||||||
category = 'news, politics, Chile'
|
category = 'news, politics, Chile'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
|
cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'despliegue-txt_750px'})
|
dict(name='div', attrs={'class':'despliegue-txt_750px'})
|
||||||
,dict(name='div', attrs={'id':'div_cuerpo_participa'})
|
,dict(name='div', attrs={'id':'div_cuerpo_participa'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
|
dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
|
||||||
,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
|
,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Noticias de ultima hora', u'http://www.emol.com/rss20/rss.asp?canal=0')
|
(u'Noticias de ultima hora', u'http://www.emol.com/rss20/rss.asp?canal=0')
|
||||||
,(u'Nacional', u'http://www.emol.com/rss20/rss.asp?canal=1')
|
,(u'Nacional', u'http://www.emol.com/rss20/rss.asp?canal=1')
|
||||||
,(u'Mundo', u'http://www.emol.com/rss20/rss.asp?canal=2')
|
,(u'Mundo', u'http://www.emol.com/rss20/rss.asp?canal=2')
|
||||||
,(u'Deportes', u'http://www.emol.com/rss20/rss.asp?canal=4')
|
,(u'Deportes', u'http://www.emol.com/rss20/rss.asp?canal=4')
|
||||||
,(u'Magazine', u'http://www.emol.com/rss20/rss.asp?canal=6')
|
,(u'Magazine', u'http://www.emol.com/rss20/rss.asp?canal=6')
|
||||||
,(u'Tecnologia', u'http://www.emol.com/rss20/rss.asp?canal=5')
|
,(u'Tecnologia', u'http://www.emol.com/rss20/rss.asp?canal=5')
|
||||||
,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
|
,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
@ -1,66 +1,66 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
eluniversal.com.mx
|
eluniversal.com.mx
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ElUniversal(BasicNewsRecipe):
|
class ElUniversal(BasicNewsRecipe):
|
||||||
title = 'El Universal'
|
title = 'El Universal'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Mexico'
|
description = 'News from Mexico'
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
publisher = 'El Universal'
|
publisher = 'El Universal'
|
||||||
category = 'news, politics, Mexico'
|
category = 'news, politics, Mexico'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
|
||||||
remove_tags = [dict(name='link')]
|
remove_tags = [dict(name='link')]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Minuto por Minuto', u'http://www.eluniversal.com.mx/rss/universalmxm.xml' )
|
(u'Minuto por Minuto', u'http://www.eluniversal.com.mx/rss/universalmxm.xml' )
|
||||||
,(u'Mundo' , u'http://www.eluniversal.com.mx/rss/mundo.xml' )
|
,(u'Mundo' , u'http://www.eluniversal.com.mx/rss/mundo.xml' )
|
||||||
,(u'Mexico' , u'http://www.eluniversal.com.mx/rss/mexico.xml' )
|
,(u'Mexico' , u'http://www.eluniversal.com.mx/rss/mexico.xml' )
|
||||||
,(u'Estados' , u'http://www.eluniversal.com.mx/rss/estados.xml' )
|
,(u'Estados' , u'http://www.eluniversal.com.mx/rss/estados.xml' )
|
||||||
,(u'Finanzas' , u'http://www.eluniversal.com.mx/rss/finanzas.xml' )
|
,(u'Finanzas' , u'http://www.eluniversal.com.mx/rss/finanzas.xml' )
|
||||||
,(u'Deportes' , u'http://www.eluniversal.com.mx/rss/deportes.xml' )
|
,(u'Deportes' , u'http://www.eluniversal.com.mx/rss/deportes.xml' )
|
||||||
,(u'Espectaculos' , u'http://www.eluniversal.com.mx/rss/espectaculos.xml' )
|
,(u'Espectaculos' , u'http://www.eluniversal.com.mx/rss/espectaculos.xml' )
|
||||||
,(u'Cultura' , u'http://www.eluniversal.com.mx/rss/cultura.xml' )
|
,(u'Cultura' , u'http://www.eluniversal.com.mx/rss/cultura.xml' )
|
||||||
,(u'Ciencia' , u'http://www.eluniversal.com.mx/rss/ciencia.xml' )
|
,(u'Ciencia' , u'http://www.eluniversal.com.mx/rss/ciencia.xml' )
|
||||||
,(u'Computacion' , u'http://www.eluniversal.com.mx/rss/computo.xml' )
|
,(u'Computacion' , u'http://www.eluniversal.com.mx/rss/computo.xml' )
|
||||||
,(u'Sociedad' , u'http://www.eluniversal.com.mx/rss/sociedad.xml' )
|
,(u'Sociedad' , u'http://www.eluniversal.com.mx/rss/sociedad.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/notas/','/notas/vi_')
|
return url.replace('/notas/','/notas/vi_')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="es-MX"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mtag = '<meta http-equiv="Content-Language" content="es-MX"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
for item in soup.findAll(font=True):
|
for item in soup.findAll(font=True):
|
||||||
del item['font']
|
del item['font']
|
||||||
for item in soup.findAll(face=True):
|
for item in soup.findAll(face=True):
|
||||||
del item['face']
|
del item['face']
|
||||||
for item in soup.findAll(helvetica=True):
|
for item in soup.findAll(helvetica=True):
|
||||||
del item['helvetica']
|
del item['helvetica']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -1,62 +1,62 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elargentino.com
|
elargentino.com
|
||||||
'''
|
'''
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ElArgentino(BasicNewsRecipe):
|
class ElArgentino(BasicNewsRecipe):
|
||||||
title = 'ElArgentino.com'
|
title = 'ElArgentino.com'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Informacion Libre las 24 horas'
|
description = 'Informacion Libre las 24 horas'
|
||||||
publisher = 'ElArgentino.com'
|
publisher = 'ElArgentino.com'
|
||||||
category = 'news, politics, Argentina'
|
category = 'news, politics, Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
|
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'noprint' })
|
dict(name='div', attrs={'id':'noprint' })
|
||||||
,dict(name='div', attrs={'class':'encabezadoImprimir'})
|
,dict(name='div', attrs={'class':'encabezadoImprimir'})
|
||||||
,dict(name='a' , attrs={'target':'_blank' })
|
,dict(name='a' , attrs={'target':'_blank' })
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Portada' , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home' )
|
(u'Portada' , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home' )
|
||||||
,(u'Pais' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs' )
|
,(u'Pais' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs' )
|
||||||
,(u'Economia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa' )
|
,(u'Economia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa' )
|
||||||
,(u'Mundo' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=113&Content-Type=text/xml&ChannelDesc=Mundo' )
|
,(u'Mundo' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=113&Content-Type=text/xml&ChannelDesc=Mundo' )
|
||||||
,(u'Tecnologia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=118&Content-Type=text/xml&ChannelDesc=Tecnolog%C3%ADa' )
|
,(u'Tecnologia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=118&Content-Type=text/xml&ChannelDesc=Tecnolog%C3%ADa' )
|
||||||
,(u'Espectaculos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=114&Content-Type=text/xml&ChannelDesc=Espect%C3%A1culos')
|
,(u'Espectaculos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=114&Content-Type=text/xml&ChannelDesc=Espect%C3%A1culos')
|
||||||
,(u'Deportes' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=106&Content-Type=text/xml&ChannelDesc=Deportes' )
|
,(u'Deportes' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=106&Content-Type=text/xml&ChannelDesc=Deportes' )
|
||||||
,(u'Sociedad' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=109&Content-Type=text/xml&ChannelDesc=Sociedad' )
|
,(u'Sociedad' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=109&Content-Type=text/xml&ChannelDesc=Sociedad' )
|
||||||
,(u'Entrevistas' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=115&Content-Type=text/xml&ChannelDesc=Entrevistas' )
|
,(u'Entrevistas' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=115&Content-Type=text/xml&ChannelDesc=Entrevistas' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
main, sep, article_part = url.partition('/nota-')
|
main, sep, article_part = url.partition('/nota-')
|
||||||
article_id, rsep, rrest = article_part.partition('-')
|
article_id, rsep, rrest = article_part.partition('-')
|
||||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
@ -1,72 +1,72 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
cronista.com
|
cronista.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ElCronista(BasicNewsRecipe):
|
class ElCronista(BasicNewsRecipe):
|
||||||
title = 'El Cronista'
|
title = 'El Cronista'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Argentina'
|
description = 'Noticias de Argentina'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , 'news, Argentina'
|
, '--category' , 'news, Argentina'
|
||||||
, '--publisher' , title
|
, '--publisher' , title
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='table', attrs={'width':'100%' })
|
dict(name='table', attrs={'width':'100%' })
|
||||||
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
|
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
|
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
|
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
|
||||||
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
|
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
|
||||||
,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' )
|
,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' )
|
||||||
,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' )
|
,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' )
|
||||||
,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' )
|
,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' )
|
||||||
,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' )
|
,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' )
|
||||||
,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml')
|
,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml')
|
||||||
,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' )
|
,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' )
|
||||||
,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' )
|
,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' )
|
||||||
,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' )
|
,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' )
|
||||||
,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' )
|
,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
main, sep, rest = url.partition('.com/notas/')
|
main, sep, rest = url.partition('.com/notas/')
|
||||||
article_id, lsep, rrest = rest.partition('-')
|
article_id, lsep, rrest = rest.partition('-')
|
||||||
return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id
|
return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
soup.head.base.extract()
|
soup.head.base.extract()
|
||||||
htext = soup.find('h1',attrs={'class':'Arialgris16normal'})
|
htext = soup.find('h1',attrs={'class':'Arialgris16normal'})
|
||||||
htext.name = 'p'
|
htext.name = 'p'
|
||||||
soup.prettify()
|
soup.prettify()
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
cover_url = None
|
cover_url = None
|
||||||
index = 'http://www.cronista.com/contenidos/'
|
index = 'http://www.cronista.com/contenidos/'
|
||||||
soup = self.index_to_soup(index + 'ee.html')
|
soup = self.index_to_soup(index + 'ee.html')
|
||||||
link_item = soup.find('a',attrs={'href':"javascript:Close()"})
|
link_item = soup.find('a',attrs={'href':"javascript:Close()"})
|
||||||
if link_item:
|
if link_item:
|
||||||
cover_url = index + link_item.img['src']
|
cover_url = index + link_item.img['src']
|
||||||
return cover_url
|
return cover_url
|
||||||
|
|
||||||
|
@ -1,61 +1,60 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elmundo.es
|
elmundo.es
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ElMundo(BasicNewsRecipe):
|
class ElMundo(BasicNewsRecipe):
|
||||||
title = 'El Mundo'
|
title = 'El Mundo'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'News from Spain'
|
description = 'News from Spain'
|
||||||
publisher = 'El Mundo'
|
publisher = 'El Mundo'
|
||||||
category = 'news, politics, Spain'
|
category = 'news, politics, Spain'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'iso8859_15'
|
encoding = 'iso8859_15'
|
||||||
cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
|
cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':['bloqueprincipal','noticia']})
|
dict(name='div', attrs={'id':['bloqueprincipal','noticia']})
|
||||||
,dict(name='div', attrs={'class':['contenido_noticia_01']})
|
,dict(name='div', attrs={'class':['contenido_noticia_01']})
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'class':['herramientas','publicidad_google']})
|
dict(name='div', attrs={'class':['herramientas','publicidad_google']})
|
||||||
,dict(name='div', attrs={'id':'modulo_multimedia' })
|
,dict(name='div', attrs={'id':'modulo_multimedia' })
|
||||||
,dict(name='ul', attrs={'class':'herramientas' })
|
,dict(name='ul', attrs={'class':'herramientas' })
|
||||||
,dict(name=['object','link'])
|
,dict(name=['object','link'])
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
|
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
|
||||||
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
|
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
|
||||||
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
|
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
|
||||||
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
|
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
|
||||||
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
|
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
|
||||||
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
|
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
|
||||||
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
|
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
|
||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
@ -1,56 +1,56 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elperiodico.cat
|
elperiodico.cat
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class ElPeriodico_cat(BasicNewsRecipe):
|
class ElPeriodico_cat(BasicNewsRecipe):
|
||||||
title = 'El Periodico de Catalunya'
|
title = 'El Periodico de Catalunya'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias desde Catalunya'
|
description = 'Noticias desde Catalunya'
|
||||||
publisher = 'elperiodico.cat'
|
publisher = 'elperiodico.cat'
|
||||||
category = 'news, politics, Spain, Catalunya'
|
category = 'news, politics, Spain, Catalunya'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
delay = 1
|
delay = 1
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
language = 'ca'
|
language = 'ca'
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [(u"Tota l'edició", u'http://www.elperiodico.cat/rss.asp?id=46')]
|
feeds = [(u"Tota l'edició", u'http://www.elperiodico.cat/rss.asp?id=46')]
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','script'])
|
dict(name=['object','link','script'])
|
||||||
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
||||||
,dict(name='div', attrs={'id':'inferiores'})
|
,dict(name='div', attrs={'id':'inferiores'})
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/default.asp?','/print.asp?')
|
return url.replace('/default.asp?','/print.asp?')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mcharset)
|
soup.head.insert(0,mcharset)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -1,56 +1,56 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
elperiodico.com
|
elperiodico.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class ElPeriodico_esp(BasicNewsRecipe):
|
class ElPeriodico_esp(BasicNewsRecipe):
|
||||||
title = 'El Periodico de Catalunya'
|
title = 'El Periodico de Catalunya'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias desde Catalunya'
|
description = 'Noticias desde Catalunya'
|
||||||
publisher = 'elperiodico.com'
|
publisher = 'elperiodico.com'
|
||||||
category = 'news, politics, Spain, Catalunya'
|
category = 'news, politics, Spain, Catalunya'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
delay = 1
|
delay = 1
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [(u"Toda la edición", u'http://www.elperiodico.com/rss.asp?id=46')]
|
feeds = [(u"Toda la edición", u'http://www.elperiodico.com/rss.asp?id=46')]
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','script'])
|
dict(name=['object','link','script'])
|
||||||
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
||||||
,dict(name='div', attrs={'id':'inferiores'})
|
,dict(name='div', attrs={'id':'inferiores'})
|
||||||
]
|
]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return url.replace('/default.asp?','/print.asp?')
|
return url.replace('/default.asp?','/print.asp?')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mcharset)
|
soup.head.insert(0,mcharset)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -1,53 +1,53 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.tiempo.hn
|
www.tiempo.hn
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class ElTiempoHn(BasicNewsRecipe):
|
class ElTiempoHn(BasicNewsRecipe):
|
||||||
title = 'El Tiempo - Honduras'
|
title = 'El Tiempo - Honduras'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Honduras y mundo'
|
description = 'Noticias de Honduras y mundo'
|
||||||
publisher = 'El Tiempo'
|
publisher = 'El Tiempo'
|
||||||
category = 'news, politics, Honduras'
|
category = 'news, politics, Honduras'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
lang = 'es-HN'
|
lang = 'es-HN'
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} img {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em}"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} img {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em}"'
|
||||||
|
|
||||||
remove_tags = [dict(name=['form','object','embed','base'])]
|
remove_tags = [dict(name=['form','object','embed','base'])]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='td' , attrs={'id':'mainbodycont'})]
|
keep_only_tags = [dict(name='td' , attrs={'id':'mainbodycont'})]
|
||||||
|
|
||||||
feeds = [(u'Noticias', u'http://www.tiempo.hn/index.php?format=feed&type=rss')]
|
feeds = [(u'Noticias', u'http://www.tiempo.hn/index.php?format=feed&type=rss')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir' ] = self.direction
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
@ -1,32 +1,31 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
engadget.com
|
engadget.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import string,re
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
class Engadget(BasicNewsRecipe):
|
||||||
class Engadget(BasicNewsRecipe):
|
title = u'Engadget'
|
||||||
title = u'Engadget'
|
__author__ = 'Darko Miletic'
|
||||||
__author__ = 'Darko Miletic'
|
description = 'Tech news'
|
||||||
description = 'Tech news'
|
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
keep_only_tags = [ dict(name='div', attrs={'class':'post'}) ]
|
keep_only_tags = [ dict(name='div', attrs={'class':'post'}) ]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='object')
|
dict(name='object')
|
||||||
,dict(name='div', attrs={'class':'postmeta'})
|
,dict(name='div', attrs={'class':'postmeta'})
|
||||||
,dict(name='div', attrs={'class':'quigoads'})
|
,dict(name='div', attrs={'class':'quigoads'})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [ (u'Posts', u'http://www.engadget.com/rss.xml')]
|
feeds = [ (u'Posts', u'http://www.engadget.com/rss.xml')]
|
||||||
|
|
||||||
|
@ -1,63 +1,63 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
www.esquire.com
|
www.esquire.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class Esquire(BasicNewsRecipe):
|
class Esquire(BasicNewsRecipe):
|
||||||
title = 'Esquire'
|
title = 'Esquire'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Esquire: Man at His Best'
|
description = 'Esquire: Man at His Best'
|
||||||
publisher = 'Hearst Communications, Inc.'
|
publisher = 'Hearst Communications, Inc.'
|
||||||
category = 'magazine, men, women we love, style, the guide, sex, screen'
|
category = 'magazine, men, women we love, style, the guide, sex, screen'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1250'
|
encoding = 'cp1250'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
lang = 'en-US'
|
lang = 'en-US'
|
||||||
cover_url = strftime('http://www.esquire.com/cm/esquire/cover-images/%Y_') + strftime('%m').strip('0') + '.jpg'
|
cover_url = strftime('http://www.esquire.com/cm/esquire/cover-images/%Y_') + strftime('%m').strip('0') + '.jpg'
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comment' : description
|
'comment' : description
|
||||||
, 'tags' : category
|
, 'tags' : category
|
||||||
, 'publisher' : publisher
|
, 'publisher' : publisher
|
||||||
, 'language' : lang
|
, 'language' : lang
|
||||||
, 'pretty_print' : True
|
, 'pretty_print' : True
|
||||||
}
|
}
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
||||||
|
|
||||||
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Style' , u'http://www.esquire.com/style/rss/' )
|
(u'Style' , u'http://www.esquire.com/style/rss/' )
|
||||||
,(u'Women' , u'http://www.esquire.com/women/rss/' )
|
,(u'Women' , u'http://www.esquire.com/women/rss/' )
|
||||||
,(u'Features' , u'http://www.esquire.com/features/rss/' )
|
,(u'Features' , u'http://www.esquire.com/features/rss/' )
|
||||||
,(u'Fiction' , u'http://www.esquire.com/fiction/rss/' )
|
,(u'Fiction' , u'http://www.esquire.com/fiction/rss/' )
|
||||||
,(u'Frontpage', u'http://www.esquire.com/rss/' )
|
,(u'Frontpage', u'http://www.esquire.com/rss/' )
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
rest = url.rpartition('?')[0]
|
rest = url.rpartition('?')[0]
|
||||||
article = rest.rpartition('/')[2]
|
article = rest.rpartition('/')[2]
|
||||||
return 'http://www.esquire.com/print-this/' + article
|
return 'http://www.esquire.com/print-this/' + article
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
@ -1,58 +1,58 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
exiledonline.com
|
exiledonline.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Exiled(BasicNewsRecipe):
|
class Exiled(BasicNewsRecipe):
|
||||||
title = 'Exiled Online'
|
title = 'Exiled Online'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
|
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
|
||||||
publisher = 'Exiled Online'
|
publisher = 'Exiled Online'
|
||||||
category = 'news, politics, international'
|
category = 'news, politics, international'
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
|
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--base-font-size', '10'
|
, '--base-font-size', '10'
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher' , publisher
|
, '--publisher' , publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link'])
|
dict(name=['object','link'])
|
||||||
,dict(name='div', attrs={'class':'info'})
|
,dict(name='div', attrs={'class':'info'})
|
||||||
,dict(name='div', attrs={'id':['comments','navig']})
|
,dict(name='div', attrs={'id':['comments','navig']})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://exiledonline.com/feed/')]
|
feeds = [(u'Articles', u'http://exiledonline.com/feed/')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
raw = article.get('link', None)
|
raw = article.get('link', None)
|
||||||
final = raw + 'all/1/'
|
final = raw + 'all/1/'
|
||||||
return final
|
return final
|
||||||
|
|
||||||
|
@ -1,59 +1,59 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.expansion.com
|
www.expansion.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class Expansion(BasicNewsRecipe):
|
class Expansion(BasicNewsRecipe):
|
||||||
title = 'Diario Expansion'
|
title = 'Diario Expansion'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Lider de informacion de mercados, economica y politica'
|
description = 'Lider de informacion de mercados, economica y politica'
|
||||||
publisher = 'expansion.com'
|
publisher = 'expansion.com'
|
||||||
category = 'news, politics, Spain'
|
category = 'news, politics, Spain'
|
||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
delay = 1
|
delay = 1
|
||||||
encoding = 'iso-8859-15'
|
encoding = 'iso-8859-15'
|
||||||
language = 'es'
|
language = 'es'
|
||||||
|
|
||||||
direction = 'ltr'
|
direction = 'ltr'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
|
(u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
|
||||||
,(u'Temas del dia' , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
|
,(u'Temas del dia' , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','script'])
|
dict(name=['object','link','script'])
|
||||||
,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
|
,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
|
remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['dir' ] = self.direction
|
soup.html['dir' ] = self.direction
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||||
soup.head.insert(0,mcharset)
|
soup.head.insert(0,mcharset)
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
@ -1,55 +1,55 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.fastcompany.com
|
www.fastcompany.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class FastCompany(BasicNewsRecipe):
|
class FastCompany(BasicNewsRecipe):
|
||||||
title = 'Fast Company'
|
title = 'Fast Company'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Where ideas and people meet'
|
description = 'Where ideas and people meet'
|
||||||
publisher = 'fastcompany.com'
|
publisher = 'fastcompany.com'
|
||||||
category = 'news, technology, gadgets, games'
|
category = 'news, technology, gadgets, games'
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
simultaneous_downloads = 1
|
simultaneous_downloads = 1
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
lang = 'en'
|
lang = 'en'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
remove_tags = [dict(name=['embed','object']), dict(name='div',attrs={'class':'feedflare'})]
|
remove_tags = [dict(name=['embed','object']), dict(name='div',attrs={'class':'feedflare'})]
|
||||||
|
|
||||||
feeds = [(u'All News', u'http://feeds.feedburner.com/fastcompany/headlines')]
|
feeds = [(u'All News', u'http://feeds.feedburner.com/fastcompany/headlines')]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
return article.get('guid', None)
|
return article.get('guid', None)
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
soup.html['xml:lang'] = self.lang
|
soup.html['xml:lang'] = self.lang
|
||||||
soup.html['lang'] = self.lang
|
soup.html['lang'] = self.lang
|
||||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||||
soup.head.insert(0,mlang)
|
soup.head.insert(0,mlang)
|
||||||
soup.head.insert(1,mcharset)
|
soup.head.insert(1,mcharset)
|
||||||
for item in soup.findAll('a'):
|
for item in soup.findAll('a'):
|
||||||
sp = item['href'].find('http://feedads.g.doubleclick.net/')
|
sp = item['href'].find('http://feedads.g.doubleclick.net/')
|
||||||
if sp != -1:
|
if sp != -1:
|
||||||
item.extract()
|
item.extract()
|
||||||
return self.adeify_images(soup)
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
@ -1,51 +1,51 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
|
__copyright__ = '2008-2009, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
|
||||||
'''
|
'''
|
||||||
Profile to download FAZ.net
|
Profile to download FAZ.net
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class FazNet(BasicNewsRecipe):
|
class FazNet(BasicNewsRecipe):
|
||||||
title = 'FAZ NET'
|
title = 'FAZ NET'
|
||||||
__author__ = 'Kovid Goyal, Darko Miletic'
|
__author__ = 'Kovid Goyal, Darko Miletic'
|
||||||
description = 'Frankfurter Allgemeine Zeitung'
|
description = 'Frankfurter Allgemeine Zeitung'
|
||||||
publisher = 'FAZ Electronic Media GmbH'
|
publisher = 'FAZ Electronic Media GmbH'
|
||||||
category = 'news, politics, Germany'
|
category = 'news, politics, Germany'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'de'
|
language = 'de'
|
||||||
|
|
||||||
max_articles_per_feed = 30
|
max_articles_per_feed = 30
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
|
keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','embed','base'])
|
dict(name=['object','link','embed','base'])
|
||||||
,dict(name='div', attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo']})
|
,dict(name='div', attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo']})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [ ('FAZ.NET', 'http://www.faz.net/s/Rub/Tpl~Epartner~SRss_.xml') ]
|
feeds = [ ('FAZ.NET', 'http://www.faz.net/s/Rub/Tpl~Epartner~SRss_.xml') ]
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
article, sep, rest = url.partition('?')
|
article, sep, rest = url.partition('?')
|
||||||
return article.replace('.html', '~Afor~Eprint.html')
|
return article.replace('.html', '~Afor~Eprint.html')
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
del soup.body['onload']
|
del soup.body['onload']
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user