mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement a check setup command that uses PyFlakes to check for various errors
This commit is contained in:
parent
792c6b0b22
commit
f9ff180347
@ -1,5 +1,5 @@
|
||||
*_ui.py
|
||||
moc_*.cpp
|
||||
.check-cache.pickle
|
||||
src/calibre/plugins
|
||||
resources/images.qrc
|
||||
src/calibre/manual/.build/
|
||||
|
3
setup.py
3
setup.py
@ -6,7 +6,6 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
import sys, os, optparse
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
|
||||
@ -70,7 +69,7 @@ def main(args=sys.argv):
|
||||
command.clean()
|
||||
return 0
|
||||
|
||||
if opts.clean_all():
|
||||
if opts.clean_all:
|
||||
for cmd in commands.__all__:
|
||||
prints('Cleaning', cmd)
|
||||
getattr(commands, cmd).clean()
|
||||
|
75
setup/check.py
Normal file
75
setup/check.py
Normal file
@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os, cPickle, subprocess
|
||||
from operator import attrgetter
|
||||
from setup import Command
|
||||
|
||||
def check_for_python_errors(filename, builtins):
|
||||
from pyflakes import checker, ast
|
||||
|
||||
contents = open(filename, 'rb').read()
|
||||
|
||||
try:
|
||||
tree = ast.parse(contents, filename)
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
try:
|
||||
value = sys.exc_info()[1]
|
||||
lineno, offset, line = value[1][1:]
|
||||
except IndexError:
|
||||
lineno, offset, line = 1, 0, ''
|
||||
if line.endswith("\n"):
|
||||
line = line[:-1]
|
||||
|
||||
return [SyntaxError(filename, lineno, offset, str(value))]
|
||||
else:
|
||||
w = checker.Checker(tree, filename, builtins = builtins)
|
||||
w.messages.sort(key = attrgetter('lineno'))
|
||||
return w.messages
|
||||
|
||||
|
||||
class Check(Command):
|
||||
|
||||
BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P']
|
||||
CACHE = '.check-cache.pickle'
|
||||
|
||||
def run(self, opts):
|
||||
cache = {}
|
||||
if os.path.exists(self.CACHE):
|
||||
cache = cPickle.load(open(self.CACHE, 'rb'))
|
||||
for x in os.walk(self.j(self.SRC, 'calibre')):
|
||||
for f in x[-1]:
|
||||
f = self.j(x[0], f)
|
||||
mtime = os.stat(f).st_mtime
|
||||
if f.endswith('.py') and cache.get(f, 0) != mtime and \
|
||||
self.b(f) not in ('ptempfile.py', 'feedparser.py',
|
||||
'pyparsing.py', 'markdown.py') and 'genshi' not in f and \
|
||||
'prs500/driver.py' not in f:
|
||||
self.info('\tChecking', f)
|
||||
w = check_for_python_errors(f, self.BUILTINS)
|
||||
if w:
|
||||
self.report_errors(w)
|
||||
cPickle.dump(cache, open(self.CACHE, 'wb'), -1)
|
||||
subprocess.call(['gvim', '-f', f])
|
||||
raise SystemExit(1)
|
||||
cache[f] = mtime
|
||||
cPickle.dump(cache, open(self.CACHE, 'wb'), -1)
|
||||
|
||||
|
||||
def report_errors(self, errors):
|
||||
for err in errors:
|
||||
if isinstance(err, SyntaxError):
|
||||
print '\t\tSyntax Error'
|
||||
else:
|
||||
col = getattr(err, 'col', 0) if getattr(err, 'col', 0) else 0
|
||||
lineno = err.lineno if err.lineno else 0
|
||||
self.info('\t\t%d:%d:'%(lineno, col),
|
||||
err.message%err.message_args)
|
||||
|
@ -11,6 +11,7 @@ __all__ = [
|
||||
'build',
|
||||
'gui',
|
||||
'develop',
|
||||
'check',
|
||||
]
|
||||
|
||||
|
||||
@ -29,6 +30,8 @@ develop = Develop()
|
||||
from setup.gui import GUI
|
||||
gui = GUI()
|
||||
|
||||
from setup.check import Check
|
||||
check = Check()
|
||||
|
||||
commands = {}
|
||||
for x in __all__:
|
||||
|
@ -78,9 +78,10 @@ class GUI(Command):
|
||||
dat = pat.sub(sub, dat)
|
||||
|
||||
if form.endswith('viewer%smain.ui'%os.sep):
|
||||
self.inf('\t\tPromoting WebView')
|
||||
self.info('\t\tPromoting WebView')
|
||||
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
|
||||
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
|
||||
dat += '\nQtWebKit'
|
||||
|
||||
open(compiled_form, 'wb').write(dat)
|
||||
|
||||
|
@ -21,6 +21,11 @@ from calibre.constants import iswindows, isosx, islinux, isfrozen, \
|
||||
filesystem_encoding
|
||||
import mechanize
|
||||
|
||||
if False:
|
||||
winutil, winutilerror, __appname__, islinux, __version__
|
||||
fcntl, win32event, isfrozen, __author__, terminal_controller
|
||||
winerror, win32api
|
||||
|
||||
mimetypes.add_type('application/epub+zip', '.epub')
|
||||
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
||||
mimetypes.add_type('application/xhtml+xml', '.xhtml')
|
||||
|
@ -13,19 +13,19 @@
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from constants import eStart, eError, eItsMe
|
||||
from constants import eStart
|
||||
|
||||
class CodingStateMachine:
|
||||
def __init__(self, sm):
|
||||
|
@ -13,19 +13,19 @@
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import constants, sys
|
||||
import constants
|
||||
from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel
|
||||
from charsetprober import CharSetProber
|
||||
from codingstatemachine import CodingStateMachine
|
||||
@ -75,5 +75,5 @@ class EscCharSetProber(CharSetProber):
|
||||
self._mState = constants.eFoundIt
|
||||
self._mDetectedCharset = codingSM.get_coding_state_machine()
|
||||
return self.get_state()
|
||||
|
||||
|
||||
return self.get_state()
|
||||
|
@ -14,19 +14,19 @@
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import constants, sys
|
||||
import constants
|
||||
from charsetgroupprober import CharSetGroupProber
|
||||
from sbcharsetprober import SingleByteCharSetProber
|
||||
from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model
|
||||
|
@ -13,19 +13,19 @@
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import constants, sys
|
||||
import constants
|
||||
from constants import eStart, eError, eItsMe
|
||||
from charsetprober import CharSetProber
|
||||
from codingstatemachine import CodingStateMachine
|
||||
|
@ -8,11 +8,10 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re
|
||||
from itertools import count, chain
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||
import re
|
||||
from itertools import count
|
||||
from calibre.ebooks.oeb.base import XHTML_NS
|
||||
from calibre.ebooks.oeb.base import OEBBook
|
||||
from lxml import etree, html
|
||||
from lxml.etree import XPath
|
||||
|
||||
NSMAP = {'h': XHTML_NS, 'html': XHTML_NS, 'xhtml': XHTML_NS}
|
||||
@ -55,5 +54,5 @@ def add_page_map(opfpath, opts):
|
||||
id = elem.attrib['id'] = idgen.next()
|
||||
href = '#'.join((item.href, id))
|
||||
oeb.pages.add(name, href)
|
||||
writer = DirWriter(version='2.0', page_map=True)
|
||||
writer = None#DirWriter(version='2.0', page_map=True)
|
||||
writer.dump(oeb, opfpath)
|
||||
|
@ -6,7 +6,6 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
from calibre import plugins
|
||||
|
||||
_lzx, _error = plugins['lzx']
|
||||
|
@ -7,3 +7,5 @@ Microsoft LIT tag and attribute tables.
|
||||
|
||||
from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
|
||||
from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
|
||||
|
||||
OPF_MAP, HTML_MAP
|
||||
|
@ -1,14 +1,14 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import sys, os
|
||||
from calibre import iswindows
|
||||
import os
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
try:
|
||||
from PIL import ImageFont
|
||||
ImageFont
|
||||
except ImportError:
|
||||
import ImageFont
|
||||
|
||||
|
||||
'''
|
||||
Default fonts used in the PRS500
|
||||
'''
|
||||
@ -48,11 +48,11 @@ def get_font_path(name):
|
||||
# then, try calibre shipped ones
|
||||
try:
|
||||
try:
|
||||
font_mod = __import__('calibre.ebooks.lrf.fonts.prs500', {}, {},
|
||||
font_mod = __import__('calibre.ebooks.lrf.fonts.prs500', {}, {},
|
||||
[fname], -1)
|
||||
getattr(font_mod, fname)
|
||||
except (ImportError, AttributeError):
|
||||
font_mod = __import__('calibre.ebooks.lrf.fonts.liberation', {}, {},
|
||||
font_mod = __import__('calibre.ebooks.lrf.fonts.liberation', {}, {},
|
||||
[LIBERATION_FONT_MAP[name]], -1)
|
||||
p = PersistentTemporaryFile('.ttf', 'font_')
|
||||
p.write(getattr(font_mod, fname).font_data)
|
||||
@ -61,7 +61,7 @@ def get_font_path(name):
|
||||
return p.name
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# finally, try system default ones
|
||||
if SYSTEM_FONT_MAP.has_key(name) and os.access(SYSTEM_FONT_MAP[name], os.R_OK):
|
||||
return SYSTEM_FONT_MAP[name]
|
||||
@ -71,7 +71,7 @@ def get_font_path(name):
|
||||
|
||||
def get_font(name, size, encoding='unic'):
|
||||
'''
|
||||
Get an ImageFont object by name.
|
||||
Get an ImageFont object by name.
|
||||
@param size: Font height in pixels. To convert from pts:
|
||||
sz in pixels = (dpi/72) * size in pts
|
||||
@param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm'
|
||||
|
@ -94,7 +94,7 @@ NAME_MAP = {
|
||||
u'springgreen': u'#00FF7F',
|
||||
u'violet': u'#EE82EE',
|
||||
u'yellowgreen': u'#9ACD32'
|
||||
}
|
||||
}
|
||||
|
||||
hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})')
|
||||
rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
|
||||
@ -109,5 +109,5 @@ def lrs_color(html_color):
|
||||
if hcol in NAME_MAP:
|
||||
return NAME_MAP[hcol].replace('#', '0x00')
|
||||
return '0x00000000'
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -10,13 +10,13 @@ from calibre.ebooks.lrf.lrfparser import LRFDocument
|
||||
from calibre.ebooks.metadata.opf import OPFCreator
|
||||
|
||||
from calibre.ebooks.lrf.objects import PageAttr, BlockAttr, TextAttr
|
||||
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import TextStyle
|
||||
|
||||
class BlockStyle(object):
|
||||
|
||||
|
||||
def __init__(self, ba):
|
||||
self.ba = ba
|
||||
|
||||
|
||||
def __str__(self):
|
||||
ans = '.'+str(self.ba.id)+' {\n'
|
||||
if hasattr(self.ba, 'sidemargin'):
|
||||
@ -37,10 +37,10 @@ class BlockStyle(object):
|
||||
ans += '\tbackground-color: %s;\n'%(self.ba.bgcolor.to_html())
|
||||
#TODO: Fixed size blocks
|
||||
return ans + '}\n'
|
||||
|
||||
|
||||
|
||||
class LRFConverter(object):
|
||||
|
||||
|
||||
def __init__(self, document, opts, logger):
|
||||
self.lrf = document
|
||||
self.opts = opts
|
||||
@ -48,15 +48,15 @@ class LRFConverter(object):
|
||||
self.logger = logger
|
||||
logger.info('Parsing LRF...')
|
||||
self.lrf.parse()
|
||||
|
||||
|
||||
self.create_metadata()
|
||||
self.create_styles()
|
||||
|
||||
|
||||
def create_metadata(self):
|
||||
self.logger.info('Reading metadata...')
|
||||
mi = get_metadata(self.lrf)
|
||||
self.opf = OPFCreator(self.output_dir, mi)
|
||||
|
||||
|
||||
def create_page_styles(self):
|
||||
self.page_css = ''
|
||||
for obj in self.lrf.objects.values():
|
||||
@ -65,21 +65,21 @@ class LRFConverter(object):
|
||||
self.page_css = selector + ' {\n'
|
||||
# TODO: Headers and footers
|
||||
self.page_css += '}\n'
|
||||
|
||||
|
||||
|
||||
|
||||
def create_block_styles(self):
|
||||
self.block_css = ''
|
||||
for obj in self.lrf.objects.values():
|
||||
if isinstance(obj, BlockAttr):
|
||||
self.block_css += str(BlockStyle(obj))
|
||||
|
||||
|
||||
def create_text_styles(self):
|
||||
self.text_css = ''
|
||||
for obj in self.lrf.objects.values():
|
||||
if isinstance(obj, TextAttr):
|
||||
self.text_css += str(TextStyle(obj))
|
||||
print self.text_css
|
||||
|
||||
|
||||
def create_styles(self):
|
||||
self.logger.info('Creating CSS stylesheet...')
|
||||
self.create_page_styles()
|
||||
@ -104,9 +104,9 @@ def process_file(lrfpath, opts, logger=None):
|
||||
raise ConversionError(opts.out + ' is not a directory')
|
||||
if not os.path.exists(opts.out):
|
||||
os.makedirs(opts.out)
|
||||
|
||||
|
||||
document = LRFDocument(open(lrfpath, 'rb'))
|
||||
conv = LRFConverter(document, opts, logger)
|
||||
conv = LRFConverter(document, opts, logger)
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
@ -116,7 +116,7 @@ def main(args=sys.argv):
|
||||
parser.print_help()
|
||||
return 1
|
||||
process_file(args[1], opts)
|
||||
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
|
@ -11,23 +11,23 @@ def ceil(num):
|
||||
return int(math.ceil(num))
|
||||
|
||||
def print_xml(elem):
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter
|
||||
from calibre.ebooks.lrf.pylrs.pylrs import ElementWriter
|
||||
elem = elem.toElement('utf8')
|
||||
ew = ElementWriter(elem, sourceEncoding='utf8')
|
||||
ew.write(sys.stdout)
|
||||
print
|
||||
|
||||
|
||||
def cattrs(base, extra):
|
||||
new = base.copy()
|
||||
new.update(extra)
|
||||
return new
|
||||
|
||||
|
||||
def tokens(tb):
|
||||
'''
|
||||
Return the next token. A token is :
|
||||
1. A string
|
||||
1. A string
|
||||
a block of text that has the same style
|
||||
'''
|
||||
'''
|
||||
def process_element(x, attrs):
|
||||
if isinstance(x, CR):
|
||||
yield 2, None
|
||||
@ -49,22 +49,22 @@ def tokens(tb):
|
||||
for y in x.contents:
|
||||
for z in process_element(y, attrs):
|
||||
yield z
|
||||
|
||||
|
||||
|
||||
|
||||
for i in tb.contents:
|
||||
if isinstance(i, CR):
|
||||
yield 1, None
|
||||
elif isinstance(i, Paragraph):
|
||||
for j in i.contents:
|
||||
for j in i.contents:
|
||||
attrs = {}
|
||||
if hasattr(j, 'attrs'):
|
||||
attrs = j.attrs
|
||||
for k in process_element(j, attrs):
|
||||
for k in process_element(j, attrs):
|
||||
yield k
|
||||
|
||||
|
||||
|
||||
class Cell(object):
|
||||
|
||||
|
||||
def __init__(self, conv, tag, css):
|
||||
self.conv = conv
|
||||
self.tag = tag
|
||||
@ -89,7 +89,7 @@ class Cell(object):
|
||||
self.rowspan = int(tag['rowspan']) if tag.has_key('rowspan') else 1
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
pp = conv.current_page
|
||||
conv.book.allow_new_page = False
|
||||
conv.current_page = conv.book.create_page()
|
||||
@ -99,7 +99,7 @@ class Cell(object):
|
||||
if isinstance(item, TextBlock):
|
||||
self.text_blocks.append(item)
|
||||
conv.current_page = pp
|
||||
conv.book.allow_new_page = True
|
||||
conv.book.allow_new_page = True
|
||||
if not self.text_blocks:
|
||||
tb = conv.book.create_text_block()
|
||||
tb.Paragraph(' ')
|
||||
@ -107,7 +107,7 @@ class Cell(object):
|
||||
for tb in self.text_blocks:
|
||||
tb.parent = None
|
||||
tb.objId = 0
|
||||
# Needed as we have to eventually change this BlockStyle's width and
|
||||
# Needed as we have to eventually change this BlockStyle's width and
|
||||
# height attributes. This blockstyle may be shared with other
|
||||
# elements, so doing that causes havoc.
|
||||
tb.blockStyle = conv.book.create_block_style()
|
||||
@ -117,17 +117,17 @@ class Cell(object):
|
||||
if ts.attrs['align'] == 'foot':
|
||||
if isinstance(tb.contents[-1], Paragraph):
|
||||
tb.contents[-1].append(' ')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def pts_to_pixels(self, pts):
|
||||
pts = int(pts)
|
||||
return ceil((float(self.conv.profile.dpi)/72.)*(pts/10.))
|
||||
|
||||
|
||||
def minimum_width(self):
|
||||
return max([self.minimum_tb_width(tb) for tb in self.text_blocks])
|
||||
|
||||
|
||||
def minimum_tb_width(self, tb):
|
||||
ts = tb.textStyle.attrs
|
||||
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
|
||||
@ -135,7 +135,7 @@ class Cell(object):
|
||||
mwidth = 0
|
||||
for token, attrs in tokens(tb):
|
||||
font = default_font
|
||||
if isinstance(token, int): # Handle para and line breaks
|
||||
if isinstance(token, int): # Handle para and line breaks
|
||||
continue
|
||||
if isinstance(token, Plot):
|
||||
return self.pts_to_pixels(token.xsize)
|
||||
@ -151,24 +151,24 @@ class Cell(object):
|
||||
if width > mwidth:
|
||||
mwidth = width
|
||||
return parindent + mwidth + 2
|
||||
|
||||
|
||||
def text_block_size(self, tb, maxwidth=sys.maxint, debug=False):
|
||||
ts = tb.textStyle.attrs
|
||||
default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
|
||||
parindent = self.pts_to_pixels(ts['parindent'])
|
||||
top, bottom, left, right = 0, 0, parindent, parindent
|
||||
|
||||
def add_word(width, height, left, right, top, bottom, ls, ws):
|
||||
|
||||
def add_word(width, height, left, right, top, bottom, ls, ws):
|
||||
if left + width > maxwidth:
|
||||
left = width + ws
|
||||
top += ls
|
||||
bottom = top+ls if top+ls > bottom else bottom
|
||||
else:
|
||||
left += (width + ws)
|
||||
right = left if left > right else right
|
||||
right = left if left > right else right
|
||||
bottom = top+ls if top+ls > bottom else bottom
|
||||
return left, right, top, bottom
|
||||
|
||||
|
||||
for token, attrs in tokens(tb):
|
||||
if attrs == None:
|
||||
attrs = {}
|
||||
@ -196,17 +196,17 @@ class Cell(object):
|
||||
width, height = font.getsize(word)
|
||||
left, right, top, bottom = add_word(width, height, left, right, top, bottom, ls, ws)
|
||||
return right+3+max(parindent, 10), bottom
|
||||
|
||||
|
||||
def text_block_preferred_width(self, tb, debug=False):
|
||||
return self.text_block_size(tb, sys.maxint, debug=debug)[0]
|
||||
|
||||
|
||||
def preferred_width(self, debug=False):
|
||||
return ceil(max([self.text_block_preferred_width(i, debug=debug) for i in self.text_blocks]))
|
||||
|
||||
|
||||
def height(self, width):
|
||||
return sum([self.text_block_size(i, width)[1] for i in self.text_blocks])
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class Row(object):
|
||||
def __init__(self, conv, row, css, colpad):
|
||||
@ -221,15 +221,15 @@ class Row(object):
|
||||
name = a['name'] if a.has_key('name') else a['id'] if a.has_key('id') else None
|
||||
if name is not None:
|
||||
self.targets.append(name.replace('#', ''))
|
||||
|
||||
|
||||
|
||||
|
||||
def number_of_cells(self):
|
||||
'''Number of cells in this row. Respects colspan'''
|
||||
ans = 0
|
||||
for cell in self.cells:
|
||||
ans += cell.colspan
|
||||
return ans
|
||||
|
||||
|
||||
def height(self, widths):
|
||||
i, heights = 0, []
|
||||
for cell in self.cells:
|
||||
@ -239,11 +239,11 @@ class Row(object):
|
||||
if not heights:
|
||||
return 0
|
||||
return max(heights)
|
||||
|
||||
|
||||
def cell_from_index(self, col):
|
||||
i = -1
|
||||
cell = None
|
||||
for cell in self.cells:
|
||||
cell = None
|
||||
for cell in self.cells:
|
||||
for k in range(0, cell.colspan):
|
||||
if i == col:
|
||||
break
|
||||
@ -251,30 +251,30 @@ class Row(object):
|
||||
if i == col:
|
||||
break
|
||||
return cell
|
||||
|
||||
|
||||
def minimum_width(self, col):
|
||||
cell = self.cell_from_index(col)
|
||||
if not cell:
|
||||
return 0
|
||||
return cell.minimum_width()
|
||||
|
||||
|
||||
def preferred_width(self, col):
|
||||
cell = self.cell_from_index(col)
|
||||
if not cell:
|
||||
return 0
|
||||
return 0 if cell.colspan > 1 else cell.preferred_width()
|
||||
|
||||
|
||||
def width_percent(self, col):
|
||||
cell = self.cell_from_index(col)
|
||||
if not cell:
|
||||
return -1
|
||||
return -1 if cell.colspan > 1 else cell.pwidth
|
||||
|
||||
|
||||
def cell_iterator(self):
|
||||
for c in self.cells:
|
||||
yield c
|
||||
|
||||
|
||||
|
||||
|
||||
class Table(object):
|
||||
def __init__(self, conv, table, css, rowpad=10, colpad=10):
|
||||
self.rows = []
|
||||
@ -283,31 +283,31 @@ class Table(object):
|
||||
self.colpad = colpad
|
||||
rows = table.findAll('tr')
|
||||
conv.in_table = True
|
||||
for row in rows:
|
||||
for row in rows:
|
||||
rcss = conv.tag_css(row, css)[0]
|
||||
self.rows.append(Row(conv, row, rcss, colpad))
|
||||
conv.in_table = False
|
||||
|
||||
|
||||
def number_of_columns(self):
|
||||
max = 0
|
||||
for row in self.rows:
|
||||
max = row.number_of_cells() if row.number_of_cells() > max else max
|
||||
return max
|
||||
|
||||
|
||||
def number_or_rows(self):
|
||||
return len(self.rows)
|
||||
|
||||
|
||||
def height(self, maxwidth):
|
||||
''' Return row heights + self.rowpad'''
|
||||
widths = self.get_widths(maxwidth)
|
||||
return sum([row.height(widths) + self.rowpad for row in self.rows]) - self.rowpad
|
||||
|
||||
|
||||
def minimum_width(self, col):
|
||||
return max([row.minimum_width(col) for row in self.rows])
|
||||
|
||||
|
||||
def width_percent(self, col):
|
||||
return max([row.width_percent(col) for row in self.rows])
|
||||
|
||||
|
||||
def get_widths(self, maxwidth):
|
||||
'''
|
||||
Return widths of columns + self.colpad
|
||||
@ -320,29 +320,29 @@ class Table(object):
|
||||
try:
|
||||
cellwidths[r] = self.rows[r].preferred_width(c)
|
||||
except IndexError:
|
||||
continue
|
||||
continue
|
||||
widths[c] = max(cellwidths)
|
||||
|
||||
|
||||
min_widths = [self.minimum_width(i)+10 for i in xrange(cols)]
|
||||
for i in xrange(len(widths)):
|
||||
wp = self.width_percent(i)
|
||||
if wp >= 0.:
|
||||
widths[i] = max(min_widths[i], ceil((wp/100.) * (maxwidth - (cols-1)*self.colpad)))
|
||||
|
||||
|
||||
|
||||
|
||||
itercount = 0
|
||||
|
||||
|
||||
while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100:
|
||||
for i in range(cols):
|
||||
widths[i] = ceil((95./100.)*widths[i]) if \
|
||||
ceil((95./100.)*widths[i]) >= min_widths[i] else widths[i]
|
||||
itercount += 1
|
||||
|
||||
|
||||
return [i+self.colpad for i in widths]
|
||||
|
||||
def blocks(self, maxwidth, maxheight):
|
||||
|
||||
def blocks(self, maxwidth, maxheight):
|
||||
rows, cols = self.number_or_rows(), self.number_of_columns()
|
||||
cellmatrix = [[None for c in range(cols)] for r in range(rows)]
|
||||
cellmatrix = [[None for c in range(cols)] for r in range(rows)]
|
||||
rowpos = [0 for i in range(rows)]
|
||||
for r in range(rows):
|
||||
nc = self.rows[r].cell_iterator()
|
||||
@ -358,14 +358,14 @@ class Table(object):
|
||||
break
|
||||
except StopIteration: # No more cells in this row
|
||||
continue
|
||||
|
||||
|
||||
|
||||
|
||||
widths = self.get_widths(maxwidth)
|
||||
heights = [row.height(widths) for row in self.rows]
|
||||
|
||||
|
||||
xpos = [sum(widths[:i]) for i in range(cols)]
|
||||
delta = maxwidth - sum(widths)
|
||||
if delta < 0:
|
||||
if delta < 0:
|
||||
delta = 0
|
||||
for r in range(len(cellmatrix)):
|
||||
yield None, 0, heights[r], 0, self.rows[r].targets
|
||||
@ -377,13 +377,13 @@ class Table(object):
|
||||
sypos = 0
|
||||
for tb in cell.text_blocks:
|
||||
tb.blockStyle = self.conv.book.create_block_style(
|
||||
blockwidth=width,
|
||||
blockwidth=width,
|
||||
blockheight=cell.text_block_size(tb, width)[1],
|
||||
blockrule='horz-fixed')
|
||||
|
||||
|
||||
yield tb, xpos[c], sypos, delta, None
|
||||
sypos += tb.blockStyle.attrs['blockheight']
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,81 +1,81 @@
|
||||
""" elements.py -- replacements and helpers for ElementTree """
|
||||
|
||||
class ElementWriter(object):
|
||||
def __init__(self, e, header=False, sourceEncoding="ascii",
|
||||
spaceBeforeClose=True, outputEncodingName="UTF-16"):
|
||||
self.header = header
|
||||
self.e = e
|
||||
self.sourceEncoding=sourceEncoding
|
||||
self.spaceBeforeClose = spaceBeforeClose
|
||||
self.outputEncodingName = outputEncodingName
|
||||
|
||||
|
||||
def _encodeCdata(self, rawText):
|
||||
if type(rawText) is str:
|
||||
rawText = rawText.decode(self.sourceEncoding)
|
||||
|
||||
text = rawText.replace("&", "&")
|
||||
text = text.replace("<", "<")
|
||||
text = text.replace(">", ">")
|
||||
return text
|
||||
|
||||
|
||||
def _writeAttribute(self, f, name, value):
|
||||
f.write(u' %s="' % unicode(name))
|
||||
if not isinstance(value, basestring):
|
||||
value = unicode(value)
|
||||
value = self._encodeCdata(value)
|
||||
value = value.replace('"', '"')
|
||||
f.write(value)
|
||||
f.write(u'"')
|
||||
|
||||
|
||||
def _writeText(self, f, rawText):
|
||||
text = self._encodeCdata(rawText)
|
||||
f.write(text)
|
||||
|
||||
|
||||
def _write(self, f, e):
|
||||
f.write(u'<' + unicode(e.tag))
|
||||
|
||||
attributes = e.items()
|
||||
attributes.sort()
|
||||
for name, value in attributes:
|
||||
self._writeAttribute(f, name, value)
|
||||
|
||||
if e.text is not None or len(e) > 0:
|
||||
f.write(u'>')
|
||||
|
||||
if e.text:
|
||||
self._writeText(f, e.text)
|
||||
|
||||
for e2 in e:
|
||||
self._write(f, e2)
|
||||
|
||||
f.write(u'</%s>' % e.tag)
|
||||
else:
|
||||
if self.spaceBeforeClose:
|
||||
f.write(' ')
|
||||
f.write(u'/>')
|
||||
|
||||
if e.tail is not None:
|
||||
self._writeText(f, e.tail)
|
||||
|
||||
|
||||
def toString(self):
|
||||
class x:
|
||||
pass
|
||||
buffer = []
|
||||
x.write = buffer.append
|
||||
self.write(x)
|
||||
return u''.join(buffer)
|
||||
|
||||
|
||||
def write(self, f):
|
||||
if self.header:
|
||||
f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
|
||||
|
||||
self._write(f, self.e)
|
||||
|
||||
|
||||
|
||||
""" elements.py -- replacements and helpers for ElementTree """
|
||||
|
||||
class ElementWriter(object):
|
||||
def __init__(self, e, header=False, sourceEncoding="ascii",
|
||||
spaceBeforeClose=True, outputEncodingName="UTF-16"):
|
||||
self.header = header
|
||||
self.e = e
|
||||
self.sourceEncoding=sourceEncoding
|
||||
self.spaceBeforeClose = spaceBeforeClose
|
||||
self.outputEncodingName = outputEncodingName
|
||||
|
||||
|
||||
def _encodeCdata(self, rawText):
|
||||
if type(rawText) is str:
|
||||
rawText = rawText.decode(self.sourceEncoding)
|
||||
|
||||
text = rawText.replace("&", "&")
|
||||
text = text.replace("<", "<")
|
||||
text = text.replace(">", ">")
|
||||
return text
|
||||
|
||||
|
||||
def _writeAttribute(self, f, name, value):
|
||||
f.write(u' %s="' % unicode(name))
|
||||
if not isinstance(value, basestring):
|
||||
value = unicode(value)
|
||||
value = self._encodeCdata(value)
|
||||
value = value.replace('"', '"')
|
||||
f.write(value)
|
||||
f.write(u'"')
|
||||
|
||||
|
||||
def _writeText(self, f, rawText):
|
||||
text = self._encodeCdata(rawText)
|
||||
f.write(text)
|
||||
|
||||
|
||||
def _write(self, f, e):
|
||||
f.write(u'<' + unicode(e.tag))
|
||||
|
||||
attributes = e.items()
|
||||
attributes.sort()
|
||||
for name, value in attributes:
|
||||
self._writeAttribute(f, name, value)
|
||||
|
||||
if e.text is not None or len(e) > 0:
|
||||
f.write(u'>')
|
||||
|
||||
if e.text:
|
||||
self._writeText(f, e.text)
|
||||
|
||||
for e2 in e:
|
||||
self._write(f, e2)
|
||||
|
||||
f.write(u'</%s>' % e.tag)
|
||||
else:
|
||||
if self.spaceBeforeClose:
|
||||
f.write(' ')
|
||||
f.write(u'/>')
|
||||
|
||||
if e.tail is not None:
|
||||
self._writeText(f, e.tail)
|
||||
|
||||
|
||||
def toString(self):
|
||||
class x:
|
||||
pass
|
||||
buffer = []
|
||||
x.write = buffer.append
|
||||
self.write(x)
|
||||
return u''.join(buffer)
|
||||
|
||||
|
||||
def write(self, f):
|
||||
if self.header:
|
||||
f.write(u'<?xml version="1.0" encoding="%s"?>\n' % self.outputEncodingName)
|
||||
|
||||
self._write(f, self.e)
|
||||
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,43 +1,43 @@
|
||||
def _optimize(tagList, tagName, conversion):
|
||||
# copy the tag of interest plus any text
|
||||
newTagList = []
|
||||
for tag in tagList:
|
||||
if tag.name == tagName or tag.name == "rawtext":
|
||||
newTagList.append(tag)
|
||||
|
||||
# now, eliminate any duplicates (leaving the last one)
|
||||
for i, newTag in enumerate(newTagList[:-1]):
|
||||
if newTag.name == tagName and newTagList[i+1].name == tagName:
|
||||
tagList.remove(newTag)
|
||||
|
||||
# eliminate redundant settings to same value across text strings
|
||||
newTagList = []
|
||||
for tag in tagList:
|
||||
if tag.name == tagName:
|
||||
newTagList.append(tag)
|
||||
|
||||
for i, newTag in enumerate(newTagList[:-1]):
|
||||
value = conversion(newTag.parameter)
|
||||
nextValue = conversion(newTagList[i+1].parameter)
|
||||
if value == nextValue:
|
||||
tagList.remove(newTagList[i+1])
|
||||
|
||||
# eliminate any setting that don't have text after them
|
||||
while len(tagList) > 0 and tagList[-1].name == tagName:
|
||||
del tagList[-1]
|
||||
|
||||
|
||||
def tagListOptimizer(tagList):
|
||||
# this function eliminates redundant or unnecessary tags
|
||||
# it scans a list of tags, looking for text settings that are
|
||||
# changed before any text is output
|
||||
# for example,
|
||||
# fontsize=100, fontsize=200, text, fontsize=100, fontsize=200
|
||||
# should be:
|
||||
# fontsize=200 text
|
||||
oldSize = len(tagList)
|
||||
_optimize(tagList, "fontsize", int)
|
||||
_optimize(tagList, "fontweight", int)
|
||||
return oldSize - len(tagList)
|
||||
|
||||
|
||||
def _optimize(tagList, tagName, conversion):
|
||||
# copy the tag of interest plus any text
|
||||
newTagList = []
|
||||
for tag in tagList:
|
||||
if tag.name == tagName or tag.name == "rawtext":
|
||||
newTagList.append(tag)
|
||||
|
||||
# now, eliminate any duplicates (leaving the last one)
|
||||
for i, newTag in enumerate(newTagList[:-1]):
|
||||
if newTag.name == tagName and newTagList[i+1].name == tagName:
|
||||
tagList.remove(newTag)
|
||||
|
||||
# eliminate redundant settings to same value across text strings
|
||||
newTagList = []
|
||||
for tag in tagList:
|
||||
if tag.name == tagName:
|
||||
newTagList.append(tag)
|
||||
|
||||
for i, newTag in enumerate(newTagList[:-1]):
|
||||
value = conversion(newTag.parameter)
|
||||
nextValue = conversion(newTagList[i+1].parameter)
|
||||
if value == nextValue:
|
||||
tagList.remove(newTagList[i+1])
|
||||
|
||||
# eliminate any setting that don't have text after them
|
||||
while len(tagList) > 0 and tagList[-1].name == tagName:
|
||||
del tagList[-1]
|
||||
|
||||
|
||||
def tagListOptimizer(tagList):
|
||||
# this function eliminates redundant or unnecessary tags
|
||||
# it scans a list of tags, looking for text settings that are
|
||||
# changed before any text is output
|
||||
# for example,
|
||||
# fontsize=100, fontsize=200, text, fontsize=100, fontsize=200
|
||||
# should be:
|
||||
# fontsize=200 text
|
||||
oldSize = len(tagList)
|
||||
_optimize(tagList, "fontsize", int)
|
||||
_optimize(tagList, "fontweight", int)
|
||||
return oldSize - len(tagList)
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,4 +2,6 @@
|
||||
# Initialize extensions
|
||||
from calibre.ebooks.markdown import mdx_footnotes
|
||||
from calibre.ebooks.markdown import mdx_tables
|
||||
from calibre.ebooks.markdown import mdx_toc
|
||||
from calibre.ebooks.markdown import mdx_toc
|
||||
|
||||
mdx_footnotes, mdx_tables, mdx_toc
|
||||
|
@ -8,8 +8,6 @@ My markdown extensions for adding:
|
||||
Table of Contents (aka toc)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import markdown
|
||||
|
||||
@ -18,7 +16,7 @@ DEFAULT_TITLE = None
|
||||
def extract_alphanumeric(in_str=None):
|
||||
"""take alpha-numeric (7bit ascii) and return as a string
|
||||
"""
|
||||
# I'm sure this is really inefficient and
|
||||
# I'm sure this is really inefficient and
|
||||
# could be done with a lambda/map()
|
||||
#x.strip().title().replace(' ', "")
|
||||
out_str=[]
|
||||
@ -42,7 +40,7 @@ class TocExtension (markdown.Extension):
|
||||
toc is returned in a div tag with class='toc'
|
||||
toc is either:
|
||||
appended to end of document
|
||||
OR
|
||||
OR
|
||||
replaces first string occurence of "///Table of Contents Goes Here///"
|
||||
"""
|
||||
|
||||
@ -75,7 +73,7 @@ class TocExtension (markdown.Extension):
|
||||
"""
|
||||
Creates Table Of Contents based on headers.
|
||||
|
||||
@returns: toc as a single as a dom element
|
||||
@returns: toc as a single as a dom element
|
||||
in a <div> tag with class='toc'
|
||||
"""
|
||||
|
||||
@ -85,9 +83,9 @@ class TocExtension (markdown.Extension):
|
||||
if element.type=='element':
|
||||
if headers_compiled_re.match(element.nodeName):
|
||||
return True
|
||||
|
||||
|
||||
headers_doc_list = doc.find(findHeadersFn)
|
||||
|
||||
|
||||
# Insert anchor tags into dom
|
||||
generated_anchor_id=0
|
||||
headers_list=[]
|
||||
@ -99,19 +97,19 @@ class TocExtension (markdown.Extension):
|
||||
if heading_type == self.auto_toc_heading_type:
|
||||
min_header_size_found=min(min_header_size_found,
|
||||
heading_type)
|
||||
|
||||
|
||||
html_anchor_name= (extract_alphanumeric(heading_title)
|
||||
+'__MD_autoTOC_%d' % (generated_anchor_id))
|
||||
|
||||
|
||||
# insert anchor tag inside header tags
|
||||
html_anchor = doc.createElement("a")
|
||||
html_anchor.setAttribute('name', html_anchor_name)
|
||||
element.appendChild(html_anchor)
|
||||
|
||||
|
||||
headers_list.append( (heading_type, heading_title,
|
||||
html_anchor_name) )
|
||||
generated_anchor_id = generated_anchor_id + 1
|
||||
|
||||
|
||||
# create dom for toc
|
||||
if headers_list != []:
|
||||
# Create list
|
||||
@ -125,9 +123,9 @@ class TocExtension (markdown.Extension):
|
||||
toc_doc_link.appendChild(toc_doc_text)
|
||||
toc_doc_entry.appendChild(toc_doc_link)
|
||||
toc_doc_list.appendChild(toc_doc_entry)
|
||||
|
||||
|
||||
# Put list into div
|
||||
|
||||
|
||||
# Put list into div
|
||||
div = doc.createElement("div")
|
||||
div.setAttribute('class', 'toc')
|
||||
if self.TOC_TITLE:
|
||||
@ -149,7 +147,7 @@ class TocPostprocessor (markdown.Postprocessor):
|
||||
|
||||
def run(self, doc):
|
||||
tocPlaceholder = self.toc.findTocPlaceholder(doc)
|
||||
|
||||
|
||||
tocDiv = self.toc.createTocDiv(doc)
|
||||
if tocDiv:
|
||||
if tocPlaceholder :
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
|
||||
'''Read meta information from IMP files'''
|
||||
|
||||
import sys, os
|
||||
import sys
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
|
||||
@ -17,7 +17,7 @@ def get_metadata(stream):
|
||||
if stream.read(10) not in MAGIC:
|
||||
print >>sys.stderr, u'Couldn\'t read IMP header from file'
|
||||
return mi
|
||||
|
||||
|
||||
def cString(skip=0):
|
||||
result = ''
|
||||
while 1:
|
||||
@ -30,7 +30,7 @@ def get_metadata(stream):
|
||||
|
||||
stream.read(38) # skip past some uninteresting headers
|
||||
_, category, title, author = cString(), cString(), cString(1), cString(2)
|
||||
|
||||
|
||||
if title:
|
||||
mi.title = title
|
||||
if author:
|
||||
|
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
|
||||
Read metadata from LRX files
|
||||
'''
|
||||
|
||||
import sys, struct
|
||||
import struct
|
||||
from zlib import decompress
|
||||
from lxml import etree
|
||||
|
||||
@ -33,7 +33,7 @@ def short_be(buf):
|
||||
def get_metadata(f):
|
||||
read = lambda at, amount: _read(f, at, amount)
|
||||
f.seek(0)
|
||||
buf = f.read(12)
|
||||
buf = f.read(12)
|
||||
if buf[4:] == 'ftypLRX2':
|
||||
offset = 0
|
||||
while True:
|
||||
@ -74,9 +74,9 @@ def get_metadata(f):
|
||||
mi.tags = [x.text for x in bi.findall('Category')]
|
||||
mi.language = root.find('DocInfo').find('Language').text
|
||||
return mi
|
||||
|
||||
|
||||
elif buf[4:8] == 'LRX':
|
||||
raise ValueError('Librie LRX format not supported')
|
||||
else:
|
||||
raise ValueError('Not a LRX file')
|
||||
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
#
|
||||
# Contributor(s):
|
||||
#
|
||||
import zipfile, sys, re
|
||||
import zipfile, re
|
||||
import xml.sax.saxutils
|
||||
from cStringIO import StringIO
|
||||
|
||||
@ -46,7 +46,7 @@ fields = {
|
||||
}
|
||||
|
||||
def normalize(str):
|
||||
"""
|
||||
"""
|
||||
The normalize-space function returns the argument string with whitespace
|
||||
normalized by stripping leading and trailing whitespace and replacing
|
||||
sequences of whitespace characters by a single space.
|
||||
@ -125,7 +125,7 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator):
|
||||
else:
|
||||
texttag = self._tag
|
||||
self.seenfields[texttag] = self.data()
|
||||
|
||||
|
||||
if field in self.deletefields:
|
||||
self.output.dowrite = True
|
||||
else:
|
||||
@ -140,7 +140,7 @@ class odfmetaparser(xml.sax.saxutils.XMLGenerator):
|
||||
|
||||
def data(self):
|
||||
return normalize(''.join(self._data))
|
||||
|
||||
|
||||
def get_metadata(stream):
|
||||
zin = zipfile.ZipFile(stream, 'r')
|
||||
odfs = odfmetaparser()
|
||||
@ -161,6 +161,6 @@ def get_metadata(stream):
|
||||
mi.language = data['language']
|
||||
if data.get('keywords', ''):
|
||||
mi.tags = data['keywords'].split(',')
|
||||
|
||||
|
||||
return mi
|
||||
|
||||
|
@ -3,8 +3,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os
|
||||
from zipfile import ZipFile
|
||||
from cStringIO import StringIO
|
||||
from zipfile import ZipFile
|
||||
from cStringIO import StringIO
|
||||
|
||||
|
||||
def get_metadata(stream):
|
||||
@ -20,5 +20,5 @@ def get_metadata(stream):
|
||||
stream = StringIO(zf.read(f))
|
||||
return get_metadata(stream, stream_type)
|
||||
raise ValueError('No ebook found in ZIP archive')
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -3,7 +3,6 @@
|
||||
'''
|
||||
Writer content to palmdoc pdb file.
|
||||
'''
|
||||
import os
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
|
@ -4,7 +4,6 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
class zTXTError(Exception):
|
||||
pass
|
||||
|
@ -12,8 +12,6 @@ Decrypt content of PDF.
|
||||
import os, sys
|
||||
from optparse import OptionGroup, Option
|
||||
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
@ -36,8 +34,8 @@ OPTIONS = set([
|
||||
|
||||
class DecryptionError(Exception):
|
||||
def __init__(self, pdf_path):
|
||||
self.value = 'Unable to decrypt file `%s`.' % value
|
||||
|
||||
self.value = 'Unable to decrypt file `%s`.' % pdf_path
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.value)
|
||||
|
||||
@ -62,20 +60,20 @@ def add_options(parser):
|
||||
group = OptionGroup(parser, _('Decrypt Options:'), _('Options to control the transformation of pdf'))
|
||||
parser.add_option_group(group)
|
||||
add_option = group.add_option
|
||||
|
||||
|
||||
for rec in OPTIONS:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
def decrypt(pdf_path, out_path, password):
|
||||
pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))
|
||||
|
||||
|
||||
if pdf.decrypt(str(password)) == 0:
|
||||
raise DecryptionError(pdf_path)
|
||||
|
||||
|
||||
title = pdf.documentInfo.title if pdf.documentInfo.title else _('Unknown')
|
||||
author = pdf.documentInfo.author if pdf.documentInfo.author else _('Unknown')
|
||||
out_pdf = PdfFileWriter(title=title, author=author)
|
||||
|
||||
|
||||
for page in pdf.pages:
|
||||
out_pdf.addPage(page)
|
||||
|
||||
@ -86,23 +84,23 @@ def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
add_options(parser)
|
||||
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
|
||||
if len(args) < 2:
|
||||
print 'Error: A PDF file and decryption password is required.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
|
||||
if not is_valid_pdf(args[0]):
|
||||
print 'Error: Could not read file `%s`.' % args[0]
|
||||
return 1
|
||||
|
||||
|
||||
if not is_encrypted(args[0]):
|
||||
print 'Error: file `%s` is not encrypted.' % args[0]
|
||||
return 1
|
||||
|
||||
|
||||
try:
|
||||
decrypt(args[0], opts.output, args[1])
|
||||
except DecryptionError, e:
|
||||
|
@ -17,6 +17,8 @@ from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.ebooks.pdf.verify import is_valid_pdf, is_encrypted
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.ebooks.metadata.meta import metadata_from_formats
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
|
||||
@ -52,7 +54,7 @@ def add_options(parser):
|
||||
group = OptionGroup(parser, _('Encrypt Options:'), _('Options to control the transformation of pdf'))
|
||||
parser.add_option_group(group)
|
||||
add_option = group.add_option
|
||||
|
||||
|
||||
for rec in OPTIONS:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
@ -78,23 +80,23 @@ def main(args=sys.argv, name=''):
|
||||
log = Log()
|
||||
parser = option_parser(name)
|
||||
add_options(parser)
|
||||
|
||||
|
||||
opts, args = parser.parse_args(args)
|
||||
args = args[1:]
|
||||
|
||||
|
||||
if len(args) < 2:
|
||||
print 'Error: A PDF file and decryption password is required.\n'
|
||||
print_help(parser, log)
|
||||
return 1
|
||||
|
||||
|
||||
if not is_valid_pdf(args[0]):
|
||||
print 'Error: Could not read file `%s`.' % args[0]
|
||||
return 1
|
||||
|
||||
|
||||
if is_encrypted(args[0]):
|
||||
print 'Error: file `%s` is already encrypted.' % args[0]
|
||||
return 1
|
||||
|
||||
|
||||
mi = metadata_from_formats([args[0]])
|
||||
|
||||
encrypt(args[0], opts.output, args[1], mi)
|
||||
|
@ -11,25 +11,25 @@ Verify PDF files.
|
||||
|
||||
import os
|
||||
|
||||
from pyPdf import PdfFileWriter, PdfFileReader
|
||||
from pyPdf import PdfFileReader
|
||||
|
||||
def is_valid_pdf(pdf_path):
|
||||
'''
|
||||
Returns True if the pdf file is valid.
|
||||
'''
|
||||
|
||||
|
||||
try:
|
||||
with open(os.path.abspath(pdf_path), 'rb') as pdf_file:
|
||||
pdf = PdfFileReader(pdf_file)
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_pdfs(pdf_paths):
|
||||
'''
|
||||
Returns a list of invalid pdf files.
|
||||
'''
|
||||
|
||||
|
||||
invalid = []
|
||||
for pdf_path in pdf_paths:
|
||||
if not is_valid_pdf(pdf_path):
|
||||
|
@ -4,7 +4,6 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
import struct
|
||||
import zlib
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
# #
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, shutil
|
||||
import os, shutil
|
||||
|
||||
class Copy:
|
||||
"""Copy each changed file to a directory for debugging purposes"""
|
||||
@ -66,6 +66,6 @@ class Copy:
|
||||
"""
|
||||
write_file = os.path.join(Copy.__dir,new_file)
|
||||
shutil.copyfile(file, write_file)
|
||||
|
||||
|
||||
def rename(self, source, dest):
|
||||
shutil.copyfile(source, dest)
|
||||
shutil.copyfile(source, dest)
|
||||
|
@ -1,5 +1,4 @@
|
||||
import sys
|
||||
from calibre.ebooks import rtf2xml
|
||||
class ParseOptions:
|
||||
"""
|
||||
Requires:
|
||||
|
@ -16,7 +16,6 @@
|
||||
# #
|
||||
#########################################################################
|
||||
import sys, os, codecs
|
||||
from calibre.ebooks import rtf2xml
|
||||
class Output:
|
||||
"""
|
||||
Output file
|
||||
|
@ -15,8 +15,6 @@
|
||||
# #
|
||||
# #
|
||||
#########################################################################
|
||||
import sys,os
|
||||
from calibre.ebooks import rtf2xml
|
||||
class OverrideTable:
|
||||
"""
|
||||
Parse a line of text to make the override table. Return a string
|
||||
|
@ -7,21 +7,19 @@ from calibre.gui2 import file_icon_provider
|
||||
from calibre.gui2.dialogs.choose_format_ui import Ui_ChooseFormatDialog
|
||||
|
||||
class ChooseFormatDialog(QDialog, Ui_ChooseFormatDialog):
|
||||
|
||||
|
||||
def __init__(self, window, msg, formats):
|
||||
QDialog.__init__(self, window)
|
||||
Ui_ChooseFormatDialog.__init__(self)
|
||||
self.setupUi(self)
|
||||
self.connect(self.formats, SIGNAL('activated(QModelIndex)'), lambda i: self.accept())
|
||||
|
||||
|
||||
self.msg.setText(msg)
|
||||
for format in formats:
|
||||
self.formats.addItem(QListWidgetItem(file_icon_provider().icon_from_ext(format.lower()),
|
||||
format.upper()))
|
||||
self._formats = formats
|
||||
self.formats.setCurrentRow(0)
|
||||
|
||||
|
||||
def format(self):
|
||||
return self._formats[self.formats.currentRow()]
|
||||
|
||||
|
@ -5,7 +5,7 @@ from PyQt4.QtGui import QDialog
|
||||
from calibre.gui2.dialogs.conversion_error_ui import Ui_ConversionErrorDialog
|
||||
|
||||
class ConversionErrorDialog(QDialog, Ui_ConversionErrorDialog):
|
||||
|
||||
|
||||
def __init__(self, window, title, html, show=False):
|
||||
QDialog.__init__(self, window)
|
||||
Ui_ConversionErrorDialog.__init__(self)
|
||||
@ -14,7 +14,7 @@ class ConversionErrorDialog(QDialog, Ui_ConversionErrorDialog):
|
||||
self.set_message(html)
|
||||
if show:
|
||||
self.show()
|
||||
|
||||
|
||||
def set_message(self, html):
|
||||
self.text.setHtml('<html><body>%s</body></html'%(html,))
|
||||
|
||||
|
||||
|
@ -5,20 +5,20 @@ from PyQt4.QtGui import QGraphicsView
|
||||
from PyQt4.QtCore import QSize
|
||||
|
||||
class BookView(QGraphicsView):
|
||||
|
||||
|
||||
MINIMUM_SIZE = QSize(400, 500)
|
||||
|
||||
|
||||
def __init__(self, *args):
|
||||
QGraphicsView.__init__(self, *args)
|
||||
self.preferred_size = self.MINIMUM_SIZE
|
||||
|
||||
|
||||
def minimumSizeHint(self):
|
||||
return self.MINIMUM_SIZE
|
||||
|
||||
|
||||
def sizeHint(self):
|
||||
return self.preferred_size
|
||||
|
||||
|
||||
def resize_for(self, width, height):
|
||||
self.preferred_size = QSize(width, height)
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
import os, math, re
|
||||
from PyQt4.Qt import QWidget, QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
|
||||
QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
|
||||
QByteArray, QColor, QWheelEvent, QPoint, QImage, QRegion, \
|
||||
QByteArray, QColor, QPoint, QImage, QRegion, \
|
||||
QFont, QObject, QApplication, pyqtSignature
|
||||
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
|
||||
|
||||
|
@ -4,17 +4,14 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
|
||||
|
||||
import os, sys, traceback, urlparse
|
||||
import os, sys, urlparse
|
||||
|
||||
from BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
from calibre.ebooks.oeb.iterator import EbookIterator
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
|
||||
from PyQt4 import QtCore
|
||||
from PyQt4.Qt import QUrl, QEventLoop, SIGNAL, QObject, QApplication, Qt, \
|
||||
QPrinter, QPrintPreviewDialog, QPrintDialog, QDialog, QMetaObject, Q_ARG
|
||||
from PyQt4 import QtCore
|
||||
from PyQt4.QtWebKit import QWebView
|
||||
|
||||
PRINTCSS = 'body{width:100%;margin:0;padding:0;font-family:Arial;color:#000;background:none;font-size:12pt;text-align:left;}h1,h2,h3,h4,h5,h6{font-family:Helvetica;}h1{font-size:19pt;}h2{font-size:17pt;}h3{font-size:15pt;}h4,h5,h6{font-size:12pt;}pre,code,samp{font:10ptCourier,monospace;white-space:pre-wrap;page-break-inside:avoid;}blockquote{margin:1.3em;padding:1em;font-size:10pt;}hr{background-color:#ccc;}aimg{border:none;}a:link,a:visited{background:transparent;font-weight:700;text-decoration:underline;color:#333;}a:link:after,a{color:#000;}table{margin:1px;text-align:left;}th{border-bottom:1pxsolid#333;font-weight:bold;}td{border-bottom:1pxsolid#333;}th,td{padding:4px10px4px0;}tfoot{font-style:italic;}caption{background:#fff;margin-bottom:2em;text-align:left;}thead{display:table-header-group;}tr{page-break-inside:avoid;}#header,.header,#footer,.footer,#navbar,.navbar,#navigation,.navigation,#rightSideBar,.rightSideBar,#leftSideBar,.leftSideBar{display:none;}'
|
||||
@ -31,18 +28,18 @@ class Printing(QObject):
|
||||
self.connect(self.view, SIGNAL('loadFinished(bool)'), self.print_preview)
|
||||
else:
|
||||
self.connect(self.view, SIGNAL('loadFinished(bool)'), self.print_book)
|
||||
|
||||
|
||||
self.process_content(spine)
|
||||
|
||||
|
||||
def process_content(self, spine):
|
||||
content = ''
|
||||
|
||||
|
||||
for path in spine:
|
||||
raw = self.raw_content(path)
|
||||
content += self.parsed_content(raw, path)
|
||||
|
||||
|
||||
refined_content = self.refine_content(content)
|
||||
|
||||
|
||||
base = os.path.splitdrive(spine[0])[0]
|
||||
base = base if base != '' else '/'
|
||||
|
||||
@ -52,7 +49,7 @@ class Printing(QObject):
|
||||
@QtCore.pyqtSignature('load_content(QString, QString)')
|
||||
def load_content(self, content, base):
|
||||
self.view.setHtml(content, QUrl(base))
|
||||
|
||||
|
||||
def raw_content(self, path):
|
||||
return open(path, 'rb').read().decode(path.encoding)
|
||||
|
||||
@ -64,11 +61,11 @@ class Printing(QObject):
|
||||
styles = dom_tree.findAll('style')
|
||||
for s in styles:
|
||||
s.extract()
|
||||
|
||||
|
||||
scripts = dom_tree.findAll('script')
|
||||
for s in scripts:
|
||||
s.extract()
|
||||
|
||||
|
||||
# Convert all relative links to absolute paths.
|
||||
links = dom_tree.findAll(src=True)
|
||||
for s in links:
|
||||
@ -85,40 +82,40 @@ class Printing(QObject):
|
||||
# Adds the print css.
|
||||
def refine_content(self, content):
|
||||
dom_tree = BeautifulSoup('<html><head></head><body>%s</body></html>' % content)
|
||||
|
||||
|
||||
css = dom_tree.findAll('link')
|
||||
for c in css:
|
||||
c.extract()
|
||||
|
||||
|
||||
print_css = Tag(BeautifulSoup(), 'style', [('type', 'text/css'), ('title', 'override_css')])
|
||||
print_css.insert(0, PRINTCSS)
|
||||
dom_tree.findAll('head')[0].insert(0, print_css)
|
||||
|
||||
|
||||
return unicode(dom_tree)
|
||||
|
||||
def print_preview(self, ok):
|
||||
printer = QPrinter(QPrinter.HighResolution)
|
||||
printer.setPageMargins(1, 1, 1, 1, QPrinter.Inch)
|
||||
|
||||
|
||||
previewDialog = QPrintPreviewDialog(printer)
|
||||
|
||||
|
||||
self.connect(previewDialog, SIGNAL('paintRequested(QPrinter *)'), self.view.print_)
|
||||
previewDialog.exec_()
|
||||
self.disconnect(previewDialog, SIGNAL('paintRequested(QPrinter *)'), self.view.print_)
|
||||
|
||||
|
||||
self.loop.quit()
|
||||
|
||||
|
||||
def print_book(self, ok):
|
||||
printer = QPrinter(QPrinter.HighResolution)
|
||||
printer.setPageMargins(1, 1, 1, 1, QPrinter.Inch)
|
||||
|
||||
|
||||
printDialog = QPrintDialog(printer)
|
||||
printDialog.setWindowTitle(_("Print eBook"))
|
||||
|
||||
|
||||
printDialog.exec_()
|
||||
if printDialog.result() == QDialog.Accepted:
|
||||
self.view.print_(printer)
|
||||
|
||||
|
||||
self.loop.quit()
|
||||
|
||||
def main():
|
||||
|
@ -18,7 +18,7 @@ sys.path.append(os.path.abspath('../../../'))
|
||||
sys.path.append(os.path.abspath('.'))
|
||||
from calibre import __appname__, __version__
|
||||
import custom
|
||||
|
||||
custom
|
||||
# General configuration
|
||||
# ---------------------
|
||||
|
||||
|
@ -1,970 +0,0 @@
|
||||
""" path.py - An object representing a path to a file or directory.
|
||||
|
||||
Example:
|
||||
|
||||
from path import path
|
||||
d = path('/home/guido/bin')
|
||||
for f in d.files('*.py'):
|
||||
f.chmod(0755)
|
||||
|
||||
This module requires Python 2.2 or later.
|
||||
|
||||
|
||||
URL: http://www.jorendorff.com/articles/python/path
|
||||
Author: Jason Orendorff <jason.orendorff\x40gmail\x2ecom> (and others - see the url!)
|
||||
Date: 9 Mar 2007
|
||||
"""
|
||||
|
||||
|
||||
# TODO
|
||||
# - Tree-walking functions don't avoid symlink loops. Matt Harrison
|
||||
# sent me a patch for this.
|
||||
# - Bug in write_text(). It doesn't support Universal newline mode.
|
||||
# - Better error message in listdir() when self isn't a
|
||||
# directory. (On Windows, the error message really sucks.)
|
||||
# - Make sure everything has a good docstring.
|
||||
# - Add methods for regex find and replace.
|
||||
# - guess_content_type() method?
|
||||
# - Perhaps support arguments to touch().
|
||||
|
||||
from __future__ import generators
|
||||
|
||||
import sys, warnings, os, fnmatch, glob, shutil, codecs, hashlib
|
||||
|
||||
__version__ = '2.2'
|
||||
__all__ = ['path']
|
||||
|
||||
# Platform-specific support for path.owner
|
||||
if os.name == 'nt':
|
||||
try:
|
||||
import win32security
|
||||
except ImportError:
|
||||
win32security = None
|
||||
else:
|
||||
try:
|
||||
import pwd
|
||||
except ImportError:
|
||||
pwd = None
|
||||
|
||||
# Pre-2.3 support. Are unicode filenames supported?
|
||||
_base = str
|
||||
_getcwd = os.getcwd
|
||||
try:
|
||||
if os.path.supports_unicode_filenames:
|
||||
_base = unicode
|
||||
_getcwd = os.getcwdu
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# Pre-2.3 workaround for booleans
|
||||
try:
|
||||
True, False
|
||||
except NameError:
|
||||
True, False = 1, 0
|
||||
|
||||
# Pre-2.3 workaround for basestring.
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = (str, unicode)
|
||||
|
||||
# Universal newline support
|
||||
_textmode = 'r'
|
||||
if hasattr(file, 'newlines'):
|
||||
_textmode = 'U'
|
||||
|
||||
|
||||
class TreeWalkWarning(Warning):
|
||||
pass
|
||||
|
||||
class path(_base):
|
||||
""" Represents a filesystem path.
|
||||
|
||||
For documentation on individual methods, consult their
|
||||
counterparts in os.path.
|
||||
"""
|
||||
|
||||
# --- Special Python methods.
|
||||
|
||||
def __repr__(self):
|
||||
return 'path(%s)' % _base.__repr__(self)
|
||||
|
||||
# Adding a path and a string yields a path.
|
||||
def __add__(self, more):
|
||||
try:
|
||||
resultStr = _base.__add__(self, more)
|
||||
except TypeError: #Python bug
|
||||
resultStr = NotImplemented
|
||||
if resultStr is NotImplemented:
|
||||
return resultStr
|
||||
return self.__class__(resultStr)
|
||||
|
||||
def __radd__(self, other):
|
||||
if isinstance(other, basestring):
|
||||
return self.__class__(other.__add__(self))
|
||||
else:
|
||||
return NotImplemented
|
||||
|
||||
# The / operator joins paths.
|
||||
def __div__(self, rel):
|
||||
""" fp.__div__(rel) == fp / rel == fp.joinpath(rel)
|
||||
|
||||
Join two path components, adding a separator character if
|
||||
needed.
|
||||
"""
|
||||
return self.__class__(os.path.join(self, rel))
|
||||
|
||||
# Make the / operator work even when true division is enabled.
|
||||
__truediv__ = __div__
|
||||
|
||||
def getcwd(cls):
|
||||
""" Return the current working directory as a path object. """
|
||||
return cls(_getcwd())
|
||||
getcwd = classmethod(getcwd)
|
||||
|
||||
|
||||
# --- Operations on path strings.
|
||||
|
||||
isabs = os.path.isabs
|
||||
def abspath(self): return self.__class__(os.path.abspath(self))
|
||||
def normcase(self): return self.__class__(os.path.normcase(self))
|
||||
def normpath(self): return self.__class__(os.path.normpath(self))
|
||||
def realpath(self): return self.__class__(os.path.realpath(self))
|
||||
def expanduser(self): return self.__class__(os.path.expanduser(self))
|
||||
def expandvars(self): return self.__class__(os.path.expandvars(self))
|
||||
def dirname(self): return self.__class__(os.path.dirname(self))
|
||||
basename = os.path.basename
|
||||
|
||||
def expand(self):
|
||||
""" Clean up a filename by calling expandvars(),
|
||||
expanduser(), and normpath() on it.
|
||||
|
||||
This is commonly everything needed to clean up a filename
|
||||
read from a configuration file, for example.
|
||||
"""
|
||||
return self.expandvars().expanduser().normpath()
|
||||
|
||||
def _get_namebase(self):
|
||||
base, ext = os.path.splitext(self.name)
|
||||
return base
|
||||
|
||||
def _get_ext(self):
|
||||
f, ext = os.path.splitext(_base(self))
|
||||
return ext
|
||||
|
||||
def _get_drive(self):
|
||||
drive, r = os.path.splitdrive(self)
|
||||
return self.__class__(drive)
|
||||
|
||||
parent = property(
|
||||
dirname, None, None,
|
||||
""" This path's parent directory, as a new path object.
|
||||
|
||||
For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib')
|
||||
""")
|
||||
|
||||
name = property(
|
||||
basename, None, None,
|
||||
""" The name of this file or directory without the full path.
|
||||
|
||||
For example, path('/usr/local/lib/libpython.so').name == 'libpython.so'
|
||||
""")
|
||||
|
||||
namebase = property(
|
||||
_get_namebase, None, None,
|
||||
""" The same as path.name, but with one file extension stripped off.
|
||||
|
||||
For example, path('/home/guido/python.tar.gz').name == 'python.tar.gz',
|
||||
but path('/home/guido/python.tar.gz').namebase == 'python.tar'
|
||||
""")
|
||||
|
||||
ext = property(
|
||||
_get_ext, None, None,
|
||||
""" The file extension, for example '.py'. """)
|
||||
|
||||
drive = property(
|
||||
_get_drive, None, None,
|
||||
""" The drive specifier, for example 'C:'.
|
||||
This is always empty on systems that don't use drive specifiers.
|
||||
""")
|
||||
|
||||
def splitpath(self):
|
||||
""" p.splitpath() -> Return (p.parent, p.name). """
|
||||
parent, child = os.path.split(self)
|
||||
return self.__class__(parent), child
|
||||
|
||||
def splitdrive(self):
|
||||
""" p.splitdrive() -> Return (p.drive, <the rest of p>).
|
||||
|
||||
Split the drive specifier from this path. If there is
|
||||
no drive specifier, p.drive is empty, so the return value
|
||||
is simply (path(''), p). This is always the case on Unix.
|
||||
"""
|
||||
drive, rel = os.path.splitdrive(self)
|
||||
return self.__class__(drive), rel
|
||||
|
||||
def splitext(self):
|
||||
""" p.splitext() -> Return (p.stripext(), p.ext).
|
||||
|
||||
Split the filename extension from this path and return
|
||||
the two parts. Either part may be empty.
|
||||
|
||||
The extension is everything from '.' to the end of the
|
||||
last path segment. This has the property that if
|
||||
(a, b) == p.splitext(), then a + b == p.
|
||||
"""
|
||||
filename, ext = os.path.splitext(self)
|
||||
return self.__class__(filename), ext
|
||||
|
||||
def stripext(self):
|
||||
""" p.stripext() -> Remove one file extension from the path.
|
||||
|
||||
For example, path('/home/guido/python.tar.gz').stripext()
|
||||
returns path('/home/guido/python.tar').
|
||||
"""
|
||||
return self.splitext()[0]
|
||||
|
||||
if hasattr(os.path, 'splitunc'):
|
||||
def splitunc(self):
|
||||
unc, rest = os.path.splitunc(self)
|
||||
return self.__class__(unc), rest
|
||||
|
||||
def _get_uncshare(self):
|
||||
unc, r = os.path.splitunc(self)
|
||||
return self.__class__(unc)
|
||||
|
||||
uncshare = property(
|
||||
_get_uncshare, None, None,
|
||||
""" The UNC mount point for this path.
|
||||
This is empty for paths on local drives. """)
|
||||
|
||||
def joinpath(self, *args):
|
||||
""" Join two or more path components, adding a separator
|
||||
character (os.sep) if needed. Returns a new path
|
||||
object.
|
||||
"""
|
||||
return self.__class__(os.path.join(self, *args))
|
||||
|
||||
def splitall(self):
|
||||
r""" Return a list of the path components in this path.
|
||||
|
||||
The first item in the list will be a path. Its value will be
|
||||
either os.curdir, os.pardir, empty, or the root directory of
|
||||
this path (for example, '/' or 'C:\\'). The other items in
|
||||
the list will be strings.
|
||||
|
||||
path.path.joinpath(*result) will yield the original path.
|
||||
"""
|
||||
parts = []
|
||||
loc = self
|
||||
while loc != os.curdir and loc != os.pardir:
|
||||
prev = loc
|
||||
loc, child = prev.splitpath()
|
||||
if loc == prev:
|
||||
break
|
||||
parts.append(child)
|
||||
parts.append(loc)
|
||||
parts.reverse()
|
||||
return parts
|
||||
|
||||
def relpath(self):
|
||||
""" Return this path as a relative path,
|
||||
based from the current working directory.
|
||||
"""
|
||||
cwd = self.__class__(os.getcwd())
|
||||
return cwd.relpathto(self)
|
||||
|
||||
def relpathto(self, dest):
|
||||
""" Return a relative path from self to dest.
|
||||
|
||||
If there is no relative path from self to dest, for example if
|
||||
they reside on different drives in Windows, then this returns
|
||||
dest.abspath().
|
||||
"""
|
||||
origin = self.abspath()
|
||||
dest = self.__class__(dest).abspath()
|
||||
|
||||
orig_list = origin.normcase().splitall()
|
||||
# Don't normcase dest! We want to preserve the case.
|
||||
dest_list = dest.splitall()
|
||||
|
||||
if orig_list[0] != os.path.normcase(dest_list[0]):
|
||||
# Can't get here from there.
|
||||
return dest
|
||||
|
||||
# Find the location where the two paths start to differ.
|
||||
i = 0
|
||||
for start_seg, dest_seg in zip(orig_list, dest_list):
|
||||
if start_seg != os.path.normcase(dest_seg):
|
||||
break
|
||||
i += 1
|
||||
|
||||
# Now i is the point where the two paths diverge.
|
||||
# Need a certain number of "os.pardir"s to work up
|
||||
# from the origin to the point of divergence.
|
||||
segments = [os.pardir] * (len(orig_list) - i)
|
||||
# Need to add the diverging part of dest_list.
|
||||
segments += dest_list[i:]
|
||||
if len(segments) == 0:
|
||||
# If they happen to be identical, use os.curdir.
|
||||
relpath = os.curdir
|
||||
else:
|
||||
relpath = os.path.join(*segments)
|
||||
return self.__class__(relpath)
|
||||
|
||||
# --- Listing, searching, walking, and matching
|
||||
|
||||
def listdir(self, pattern=None):
|
||||
""" D.listdir() -> List of items in this directory.
|
||||
|
||||
Use D.files() or D.dirs() instead if you want a listing
|
||||
of just files or just subdirectories.
|
||||
|
||||
The elements of the list are path objects.
|
||||
|
||||
With the optional 'pattern' argument, this only lists
|
||||
items whose names match the given pattern.
|
||||
"""
|
||||
names = os.listdir(self)
|
||||
if pattern is not None:
|
||||
names = fnmatch.filter(names, pattern)
|
||||
return [self / child for child in names]
|
||||
|
||||
def dirs(self, pattern=None):
|
||||
""" D.dirs() -> List of this directory's subdirectories.
|
||||
|
||||
The elements of the list are path objects.
|
||||
This does not walk recursively into subdirectories
|
||||
(but see path.walkdirs).
|
||||
|
||||
With the optional 'pattern' argument, this only lists
|
||||
directories whose names match the given pattern. For
|
||||
example, d.dirs('build-*').
|
||||
"""
|
||||
return [p for p in self.listdir(pattern) if p.isdir()]
|
||||
|
||||
def files(self, pattern=None):
|
||||
""" D.files() -> List of the files in this directory.
|
||||
|
||||
The elements of the list are path objects.
|
||||
This does not walk into subdirectories (see path.walkfiles).
|
||||
|
||||
With the optional 'pattern' argument, this only lists files
|
||||
whose names match the given pattern. For example,
|
||||
d.files('*.pyc').
|
||||
"""
|
||||
|
||||
return [p for p in self.listdir(pattern) if p.isfile()]
|
||||
|
||||
def walk(self, pattern=None, errors='strict'):
|
||||
""" D.walk() -> iterator over files and subdirs, recursively.
|
||||
|
||||
The iterator yields path objects naming each child item of
|
||||
this directory and its descendants. This requires that
|
||||
D.isdir().
|
||||
|
||||
This performs a depth-first traversal of the directory tree.
|
||||
Each directory is returned just before all its children.
|
||||
|
||||
The errors= keyword argument controls behavior when an
|
||||
error occurs. The default is 'strict', which causes an
|
||||
exception. The other allowed values are 'warn', which
|
||||
reports the error via warnings.warn(), and 'ignore'.
|
||||
"""
|
||||
if errors not in ('strict', 'warn', 'ignore'):
|
||||
raise ValueError("invalid errors parameter")
|
||||
|
||||
try:
|
||||
childList = self.listdir()
|
||||
except Exception:
|
||||
if errors == 'ignore':
|
||||
return
|
||||
elif errors == 'warn':
|
||||
warnings.warn(
|
||||
"Unable to list directory '%s': %s"
|
||||
% (self, sys.exc_info()[1]),
|
||||
TreeWalkWarning)
|
||||
return
|
||||
else:
|
||||
raise
|
||||
|
||||
for child in childList:
|
||||
if pattern is None or child.fnmatch(pattern):
|
||||
yield child
|
||||
try:
|
||||
isdir = child.isdir()
|
||||
except Exception:
|
||||
if errors == 'ignore':
|
||||
isdir = False
|
||||
elif errors == 'warn':
|
||||
warnings.warn(
|
||||
"Unable to access '%s': %s"
|
||||
% (child, sys.exc_info()[1]),
|
||||
TreeWalkWarning)
|
||||
isdir = False
|
||||
else:
|
||||
raise
|
||||
|
||||
if isdir:
|
||||
for item in child.walk(pattern, errors):
|
||||
yield item
|
||||
|
||||
def walkdirs(self, pattern=None, errors='strict'):
|
||||
""" D.walkdirs() -> iterator over subdirs, recursively.
|
||||
|
||||
With the optional 'pattern' argument, this yields only
|
||||
directories whose names match the given pattern. For
|
||||
example, mydir.walkdirs('*test') yields only directories
|
||||
with names ending in 'test'.
|
||||
|
||||
The errors= keyword argument controls behavior when an
|
||||
error occurs. The default is 'strict', which causes an
|
||||
exception. The other allowed values are 'warn', which
|
||||
reports the error via warnings.warn(), and 'ignore'.
|
||||
"""
|
||||
if errors not in ('strict', 'warn', 'ignore'):
|
||||
raise ValueError("invalid errors parameter")
|
||||
|
||||
try:
|
||||
dirs = self.dirs()
|
||||
except Exception:
|
||||
if errors == 'ignore':
|
||||
return
|
||||
elif errors == 'warn':
|
||||
warnings.warn(
|
||||
"Unable to list directory '%s': %s"
|
||||
% (self, sys.exc_info()[1]),
|
||||
TreeWalkWarning)
|
||||
return
|
||||
else:
|
||||
raise
|
||||
|
||||
for child in dirs:
|
||||
if pattern is None or child.fnmatch(pattern):
|
||||
yield child
|
||||
for subsubdir in child.walkdirs(pattern, errors):
|
||||
yield subsubdir
|
||||
|
||||
def walkfiles(self, pattern=None, errors='strict'):
|
||||
""" D.walkfiles() -> iterator over files in D, recursively.
|
||||
|
||||
The optional argument, pattern, limits the results to files
|
||||
with names that match the pattern. For example,
|
||||
mydir.walkfiles('*.tmp') yields only files with the .tmp
|
||||
extension.
|
||||
"""
|
||||
if errors not in ('strict', 'warn', 'ignore'):
|
||||
raise ValueError("invalid errors parameter")
|
||||
|
||||
try:
|
||||
childList = self.listdir()
|
||||
except Exception:
|
||||
if errors == 'ignore':
|
||||
return
|
||||
elif errors == 'warn':
|
||||
warnings.warn(
|
||||
"Unable to list directory '%s': %s"
|
||||
% (self, sys.exc_info()[1]),
|
||||
TreeWalkWarning)
|
||||
return
|
||||
else:
|
||||
raise
|
||||
|
||||
for child in childList:
|
||||
try:
|
||||
isfile = child.isfile()
|
||||
isdir = not isfile and child.isdir()
|
||||
except:
|
||||
if errors == 'ignore':
|
||||
continue
|
||||
elif errors == 'warn':
|
||||
warnings.warn(
|
||||
"Unable to access '%s': %s"
|
||||
% (self, sys.exc_info()[1]),
|
||||
TreeWalkWarning)
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
|
||||
if isfile:
|
||||
if pattern is None or child.fnmatch(pattern):
|
||||
yield child
|
||||
elif isdir:
|
||||
for f in child.walkfiles(pattern, errors):
|
||||
yield f
|
||||
|
||||
def fnmatch(self, pattern):
|
||||
""" Return True if self.name matches the given pattern.
|
||||
|
||||
pattern - A filename pattern with wildcards,
|
||||
for example '*.py'.
|
||||
"""
|
||||
return fnmatch.fnmatch(self.name, pattern)
|
||||
|
||||
def glob(self, pattern):
|
||||
""" Return a list of path objects that match the pattern.
|
||||
|
||||
pattern - a path relative to this directory, with wildcards.
|
||||
|
||||
For example, path('/users').glob('*/bin/*') returns a list
|
||||
of all the files users have in their bin directories.
|
||||
"""
|
||||
cls = self.__class__
|
||||
return [cls(s) for s in glob.glob(_base(self / pattern))]
|
||||
|
||||
|
||||
# --- Reading or writing an entire file at once.
|
||||
|
||||
def open(self, mode='r'):
|
||||
""" Open this file. Return a file object. """
|
||||
return file(self, mode)
|
||||
|
||||
def bytes(self):
|
||||
""" Open this file, read all bytes, return them as a string. """
|
||||
f = self.open('rb')
|
||||
try:
|
||||
return f.read()
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
def write_bytes(self, bytes, append=False):
|
||||
""" Open this file and write the given bytes to it.
|
||||
|
||||
Default behavior is to overwrite any existing file.
|
||||
Call p.write_bytes(bytes, append=True) to append instead.
|
||||
"""
|
||||
if append:
|
||||
mode = 'ab'
|
||||
else:
|
||||
mode = 'wb'
|
||||
f = self.open(mode)
|
||||
try:
|
||||
f.write(bytes)
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
def text(self, encoding=None, errors='strict'):
|
||||
r""" Open this file, read it in, return the content as a string.
|
||||
|
||||
This uses 'U' mode in Python 2.3 and later, so '\r\n' and '\r'
|
||||
are automatically translated to '\n'.
|
||||
|
||||
Optional arguments:
|
||||
|
||||
encoding - The Unicode encoding (or character set) of
|
||||
the file. If present, the content of the file is
|
||||
decoded and returned as a unicode object; otherwise
|
||||
it is returned as an 8-bit str.
|
||||
errors - How to handle Unicode errors; see help(str.decode)
|
||||
for the options. Default is 'strict'.
|
||||
"""
|
||||
if encoding is None:
|
||||
# 8-bit
|
||||
f = self.open(_textmode)
|
||||
try:
|
||||
return f.read()
|
||||
finally:
|
||||
f.close()
|
||||
else:
|
||||
# Unicode
|
||||
f = codecs.open(self, 'r', encoding, errors)
|
||||
# (Note - Can't use 'U' mode here, since codecs.open
|
||||
# doesn't support 'U' mode, even in Python 2.3.)
|
||||
try:
|
||||
t = f.read()
|
||||
finally:
|
||||
f.close()
|
||||
return (t.replace(u'\r\n', u'\n')
|
||||
.replace(u'\r\x85', u'\n')
|
||||
.replace(u'\r', u'\n')
|
||||
.replace(u'\x85', u'\n')
|
||||
.replace(u'\u2028', u'\n'))
|
||||
|
||||
def write_text(self, text, encoding=None, errors='strict', linesep=os.linesep, append=False):
|
||||
r""" Write the given text to this file.
|
||||
|
||||
The default behavior is to overwrite any existing file;
|
||||
to append instead, use the 'append=True' keyword argument.
|
||||
|
||||
There are two differences between path.write_text() and
|
||||
path.write_bytes(): newline handling and Unicode handling.
|
||||
See below.
|
||||
|
||||
Parameters:
|
||||
|
||||
- text - str/unicode - The text to be written.
|
||||
|
||||
- encoding - str - The Unicode encoding that will be used.
|
||||
This is ignored if 'text' isn't a Unicode string.
|
||||
|
||||
- errors - str - How to handle Unicode encoding errors.
|
||||
Default is 'strict'. See help(unicode.encode) for the
|
||||
options. This is ignored if 'text' isn't a Unicode
|
||||
string.
|
||||
|
||||
- linesep - keyword argument - str/unicode - The sequence of
|
||||
characters to be used to mark end-of-line. The default is
|
||||
os.linesep. You can also specify None; this means to
|
||||
leave all newlines as they are in 'text'.
|
||||
|
||||
- append - keyword argument - bool - Specifies what to do if
|
||||
the file already exists (True: append to the end of it;
|
||||
False: overwrite it.) The default is False.
|
||||
|
||||
|
||||
--- Newline handling.
|
||||
|
||||
write_text() converts all standard end-of-line sequences
|
||||
('\n', '\r', and '\r\n') to your platform's default end-of-line
|
||||
sequence (see os.linesep; on Windows, for example, the
|
||||
end-of-line marker is '\r\n').
|
||||
|
||||
If you don't like your platform's default, you can override it
|
||||
using the 'linesep=' keyword argument. If you specifically want
|
||||
write_text() to preserve the newlines as-is, use 'linesep=None'.
|
||||
|
||||
This applies to Unicode text the same as to 8-bit text, except
|
||||
there are three additional standard Unicode end-of-line sequences:
|
||||
u'\x85', u'\r\x85', and u'\u2028'.
|
||||
|
||||
(This is slightly different from when you open a file for
|
||||
writing with fopen(filename, "w") in C or file(filename, 'w')
|
||||
in Python.)
|
||||
|
||||
|
||||
--- Unicode
|
||||
|
||||
If 'text' isn't Unicode, then apart from newline handling, the
|
||||
bytes are written verbatim to the file. The 'encoding' and
|
||||
'errors' arguments are not used and must be omitted.
|
||||
|
||||
If 'text' is Unicode, it is first converted to bytes using the
|
||||
specified 'encoding' (or the default encoding if 'encoding'
|
||||
isn't specified). The 'errors' argument applies only to this
|
||||
conversion.
|
||||
|
||||
"""
|
||||
if isinstance(text, unicode):
|
||||
if linesep is not None:
|
||||
# Convert all standard end-of-line sequences to
|
||||
# ordinary newline characters.
|
||||
text = (text.replace(u'\r\n', u'\n')
|
||||
.replace(u'\r\x85', u'\n')
|
||||
.replace(u'\r', u'\n')
|
||||
.replace(u'\x85', u'\n')
|
||||
.replace(u'\u2028', u'\n'))
|
||||
text = text.replace(u'\n', linesep)
|
||||
if encoding is None:
|
||||
encoding = sys.getdefaultencoding()
|
||||
bytes = text.encode(encoding, errors)
|
||||
else:
|
||||
# It is an error to specify an encoding if 'text' is
|
||||
# an 8-bit string.
|
||||
assert encoding is None
|
||||
|
||||
if linesep is not None:
|
||||
text = (text.replace('\r\n', '\n')
|
||||
.replace('\r', '\n'))
|
||||
bytes = text.replace('\n', linesep)
|
||||
|
||||
self.write_bytes(bytes, append)
|
||||
|
||||
def lines(self, encoding=None, errors='strict', retain=True):
|
||||
r""" Open this file, read all lines, return them in a list.
|
||||
|
||||
Optional arguments:
|
||||
encoding - The Unicode encoding (or character set) of
|
||||
the file. The default is None, meaning the content
|
||||
of the file is read as 8-bit characters and returned
|
||||
as a list of (non-Unicode) str objects.
|
||||
errors - How to handle Unicode errors; see help(str.decode)
|
||||
for the options. Default is 'strict'
|
||||
retain - If true, retain newline characters; but all newline
|
||||
character combinations ('\r', '\n', '\r\n') are
|
||||
translated to '\n'. If false, newline characters are
|
||||
stripped off. Default is True.
|
||||
|
||||
This uses 'U' mode in Python 2.3 and later.
|
||||
"""
|
||||
if encoding is None and retain:
|
||||
f = self.open(_textmode)
|
||||
try:
|
||||
return f.readlines()
|
||||
finally:
|
||||
f.close()
|
||||
else:
|
||||
return self.text(encoding, errors).splitlines(retain)
|
||||
|
||||
def write_lines(self, lines, encoding=None, errors='strict',
|
||||
linesep=os.linesep, append=False):
|
||||
r""" Write the given lines of text to this file.
|
||||
|
||||
By default this overwrites any existing file at this path.
|
||||
|
||||
This puts a platform-specific newline sequence on every line.
|
||||
See 'linesep' below.
|
||||
|
||||
lines - A list of strings.
|
||||
|
||||
encoding - A Unicode encoding to use. This applies only if
|
||||
'lines' contains any Unicode strings.
|
||||
|
||||
errors - How to handle errors in Unicode encoding. This
|
||||
also applies only to Unicode strings.
|
||||
|
||||
linesep - The desired line-ending. This line-ending is
|
||||
applied to every line. If a line already has any
|
||||
standard line ending ('\r', '\n', '\r\n', u'\x85',
|
||||
u'\r\x85', u'\u2028'), that will be stripped off and
|
||||
this will be used instead. The default is os.linesep,
|
||||
which is platform-dependent ('\r\n' on Windows, '\n' on
|
||||
Unix, etc.) Specify None to write the lines as-is,
|
||||
like file.writelines().
|
||||
|
||||
Use the keyword argument append=True to append lines to the
|
||||
file. The default is to overwrite the file. Warning:
|
||||
When you use this with Unicode data, if the encoding of the
|
||||
existing data in the file is different from the encoding
|
||||
you specify with the encoding= parameter, the result is
|
||||
mixed-encoding data, which can really confuse someone trying
|
||||
to read the file later.
|
||||
"""
|
||||
if append:
|
||||
mode = 'ab'
|
||||
else:
|
||||
mode = 'wb'
|
||||
f = self.open(mode)
|
||||
try:
|
||||
for line in lines:
|
||||
isUnicode = isinstance(line, unicode)
|
||||
if linesep is not None:
|
||||
# Strip off any existing line-end and add the
|
||||
# specified linesep string.
|
||||
if isUnicode:
|
||||
if line[-2:] in (u'\r\n', u'\x0d\x85'):
|
||||
line = line[:-2]
|
||||
elif line[-1:] in (u'\r', u'\n',
|
||||
u'\x85', u'\u2028'):
|
||||
line = line[:-1]
|
||||
else:
|
||||
if line[-2:] == '\r\n':
|
||||
line = line[:-2]
|
||||
elif line[-1:] in ('\r', '\n'):
|
||||
line = line[:-1]
|
||||
line += linesep
|
||||
if isUnicode:
|
||||
if encoding is None:
|
||||
encoding = sys.getdefaultencoding()
|
||||
line = line.encode(encoding, errors)
|
||||
f.write(line)
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
def read_md5(self):
|
||||
""" Calculate the md5 hash for this file.
|
||||
|
||||
This reads through the entire file.
|
||||
"""
|
||||
f = self.open('rb')
|
||||
try:
|
||||
m = hashlib.md5()
|
||||
while True:
|
||||
d = f.read(8192)
|
||||
if not d:
|
||||
break
|
||||
m.update(d)
|
||||
finally:
|
||||
f.close()
|
||||
return m.digest()
|
||||
|
||||
# --- Methods for querying the filesystem.
|
||||
|
||||
exists = os.path.exists
|
||||
isdir = os.path.isdir
|
||||
isfile = os.path.isfile
|
||||
islink = os.path.islink
|
||||
ismount = os.path.ismount
|
||||
|
||||
if hasattr(os.path, 'samefile'):
|
||||
samefile = os.path.samefile
|
||||
|
||||
getatime = os.path.getatime
|
||||
atime = property(
|
||||
getatime, None, None,
|
||||
""" Last access time of the file. """)
|
||||
|
||||
getmtime = os.path.getmtime
|
||||
mtime = property(
|
||||
getmtime, None, None,
|
||||
""" Last-modified time of the file. """)
|
||||
|
||||
if hasattr(os.path, 'getctime'):
|
||||
getctime = os.path.getctime
|
||||
ctime = property(
|
||||
getctime, None, None,
|
||||
""" Creation time of the file. """)
|
||||
|
||||
getsize = os.path.getsize
|
||||
size = property(
|
||||
getsize, None, None,
|
||||
""" Size of the file, in bytes. """)
|
||||
|
||||
if hasattr(os, 'access'):
|
||||
def access(self, mode):
|
||||
""" Return true if current user has access to this path.
|
||||
|
||||
mode - One of the constants os.F_OK, os.R_OK, os.W_OK, os.X_OK
|
||||
"""
|
||||
return os.access(self, mode)
|
||||
|
||||
def stat(self):
|
||||
""" Perform a stat() system call on this path. """
|
||||
return os.stat(self)
|
||||
|
||||
def lstat(self):
|
||||
""" Like path.stat(), but do not follow symbolic links. """
|
||||
return os.lstat(self)
|
||||
|
||||
def get_owner(self):
|
||||
r""" Return the name of the owner of this file or directory.
|
||||
|
||||
This follows symbolic links.
|
||||
|
||||
On Windows, this returns a name of the form ur'DOMAIN\User Name'.
|
||||
On Windows, a group can own a file or directory.
|
||||
"""
|
||||
if os.name == 'nt':
|
||||
if win32security is None:
|
||||
raise Exception("path.owner requires win32all to be installed")
|
||||
desc = win32security.GetFileSecurity(
|
||||
self, win32security.OWNER_SECURITY_INFORMATION)
|
||||
sid = desc.GetSecurityDescriptorOwner()
|
||||
account, domain, typecode = win32security.LookupAccountSid(None, sid)
|
||||
return domain + u'\\' + account
|
||||
else:
|
||||
if pwd is None:
|
||||
raise NotImplementedError("path.owner is not implemented on this platform.")
|
||||
st = self.stat()
|
||||
return pwd.getpwuid(st.st_uid).pw_name
|
||||
|
||||
owner = property(
|
||||
get_owner, None, None,
|
||||
""" Name of the owner of this file or directory. """)
|
||||
|
||||
if hasattr(os, 'statvfs'):
|
||||
def statvfs(self):
|
||||
""" Perform a statvfs() system call on this path. """
|
||||
return os.statvfs(self)
|
||||
|
||||
if hasattr(os, 'pathconf'):
|
||||
def pathconf(self, name):
|
||||
return os.pathconf(self, name)
|
||||
|
||||
|
||||
# --- Modifying operations on files and directories
|
||||
|
||||
def utime(self, times):
|
||||
""" Set the access and modified times of this file. """
|
||||
os.utime(self, times)
|
||||
|
||||
def chmod(self, mode):
|
||||
os.chmod(self, mode)
|
||||
|
||||
if hasattr(os, 'chown'):
|
||||
def chown(self, uid, gid):
|
||||
os.chown(self, uid, gid)
|
||||
|
||||
def rename(self, new):
|
||||
os.rename(self, new)
|
||||
|
||||
def renames(self, new):
|
||||
os.renames(self, new)
|
||||
|
||||
|
||||
# --- Create/delete operations on directories
|
||||
|
||||
def mkdir(self, mode=0777):
|
||||
os.mkdir(self, mode)
|
||||
|
||||
def makedirs(self, mode=0777):
|
||||
os.makedirs(self, mode)
|
||||
|
||||
def rmdir(self):
|
||||
os.rmdir(self)
|
||||
|
||||
def removedirs(self):
|
||||
os.removedirs(self)
|
||||
|
||||
|
||||
# --- Modifying operations on files
|
||||
|
||||
def touch(self):
|
||||
""" Set the access/modified times of this file to the current time.
|
||||
Create the file if it does not exist.
|
||||
"""
|
||||
fd = os.open(self, os.O_WRONLY | os.O_CREAT, 0666)
|
||||
os.close(fd)
|
||||
os.utime(self, None)
|
||||
|
||||
def remove(self):
|
||||
os.remove(self)
|
||||
|
||||
def unlink(self):
|
||||
os.unlink(self)
|
||||
|
||||
|
||||
# --- Links
|
||||
|
||||
if hasattr(os, 'link'):
|
||||
def link(self, newpath):
|
||||
""" Create a hard link at 'newpath', pointing to this file. """
|
||||
os.link(self, newpath)
|
||||
|
||||
if hasattr(os, 'symlink'):
|
||||
def symlink(self, newlink):
|
||||
""" Create a symbolic link at 'newlink', pointing here. """
|
||||
os.symlink(self, newlink)
|
||||
|
||||
if hasattr(os, 'readlink'):
|
||||
def readlink(self):
|
||||
""" Return the path to which this symbolic link points.
|
||||
|
||||
The result may be an absolute or a relative path.
|
||||
"""
|
||||
return self.__class__(os.readlink(self))
|
||||
|
||||
def readlinkabs(self):
|
||||
""" Return the path to which this symbolic link points.
|
||||
|
||||
The result is always an absolute path.
|
||||
"""
|
||||
p = self.readlink()
|
||||
if p.isabs():
|
||||
return p
|
||||
else:
|
||||
return (self.parent / p).abspath()
|
||||
|
||||
|
||||
# --- High-level functions from shutil
|
||||
|
||||
copyfile = shutil.copyfile
|
||||
copymode = shutil.copymode
|
||||
copystat = shutil.copystat
|
||||
copy = shutil.copy
|
||||
copy2 = shutil.copy2
|
||||
copytree = shutil.copytree
|
||||
if hasattr(shutil, 'move'):
|
||||
move = shutil.move
|
||||
rmtree = shutil.rmtree
|
||||
|
||||
|
||||
# --- Special stuff from os
|
||||
|
||||
if hasattr(os, 'chroot'):
|
||||
def chroot(self):
|
||||
os.chroot(self)
|
||||
|
||||
if hasattr(os, 'startfile'):
|
||||
def startfile(self):
|
||||
os.startfile(self)
|
||||
|
@ -1,121 +0,0 @@
|
||||
|
||||
import sys, glob, re
|
||||
|
||||
import mechanize
|
||||
|
||||
URL = 'http://translate.google.com/translate_t?text=%(text)s&langpair=en|%(lang)s&oe=UTF8'
|
||||
|
||||
def browser():
|
||||
opener = mechanize.Browser()
|
||||
opener.set_handle_refresh(True)
|
||||
opener.set_handle_robots(False)
|
||||
opener.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv:1.8.0.4) Gecko/20060508 Firefox/1.5.0.4')]
|
||||
return opener
|
||||
|
||||
|
||||
class PoFile(object):
|
||||
|
||||
SANITIZE = re.compile(r'&|<[^<>]+>|\%')
|
||||
STRING = re.compile(r'"(.*)"')
|
||||
|
||||
def __init__(self, po_file):
|
||||
self.po_file = open(po_file, 'r+b')
|
||||
self.browser = browser()
|
||||
self.entries = []
|
||||
self.read()
|
||||
|
||||
def sanitize_line(self, line):
|
||||
return self.SANITIZE.sub(line)
|
||||
|
||||
def read(self):
|
||||
translated_lines = []
|
||||
self.po_file.seek(0)
|
||||
|
||||
ID = 0
|
||||
STR = 1
|
||||
WHR = 2
|
||||
|
||||
mode = None
|
||||
where, msgid, msgstr, fuzzy = [], [], [], False
|
||||
|
||||
for line in self.po_file.readlines():
|
||||
prev_mode = mode
|
||||
if line.startswith('#:'):
|
||||
mode = WHR
|
||||
elif line.startswith('msgid'):
|
||||
mode = ID
|
||||
elif line.startswith('msgstr'):
|
||||
mode = STR
|
||||
elif line.startswith('#,'):
|
||||
fuzzy = True
|
||||
continue
|
||||
elif line.startswith('#') or not line.strip():
|
||||
mode = None
|
||||
|
||||
|
||||
if mode != prev_mode:
|
||||
if prev_mode == STR:
|
||||
self.add_entry(where, fuzzy, msgid, msgstr)
|
||||
where, msgid, msgstr, fuzzy = [], [], [], False
|
||||
|
||||
if mode == WHR:
|
||||
where.append(line[2:].strip())
|
||||
elif mode == ID:
|
||||
msgid.append(self.get_string(line))
|
||||
elif mode == STR:
|
||||
msgstr.append(self.get_string(line))
|
||||
elif mode == None:
|
||||
self.add_line(line)
|
||||
|
||||
def get_string(self, line):
|
||||
return self.STRING.search(line).group(1)
|
||||
|
||||
def add_line(self, line):
|
||||
self.entries.append(line.strip())
|
||||
|
||||
def add_entry(self, where, fuzzy, msgid, msgstr):
|
||||
self.entries.append(Entry(where, fuzzy, msgid, msgstr))
|
||||
|
||||
def __str__(self):
|
||||
return '\n'.join([str(i) for i in self.entries]) + '\n'
|
||||
|
||||
|
||||
class Entry(object):
|
||||
|
||||
def __init__(self, where, fuzzy, msgid, msgstr, encoding='utf-8'):
|
||||
self.fuzzy = fuzzy
|
||||
self.where = [i.decode(encoding) for i in where]
|
||||
self.msgid = [i.decode(encoding) for i in msgid]
|
||||
self.msgstr = [i.decode(encoding) for i in msgstr]
|
||||
self.encoding = encoding
|
||||
|
||||
def __str__(self):
|
||||
ans = []
|
||||
for line in self.where:
|
||||
ans.append('#: ' + line.encode(self.encoding))
|
||||
if self.fuzzy:
|
||||
ans.append('#, fuzzy')
|
||||
first = True
|
||||
for line in self.msgid:
|
||||
prefix = 'msgid ' if first else ''
|
||||
ans.append(prefix + '"%s"'%line.encode(self.encoding))
|
||||
first = False
|
||||
first = True
|
||||
for line in self.msgstr:
|
||||
prefix = 'msgstr ' if first else ''
|
||||
ans.append(prefix + '"%s"'%line.encode(self.encoding))
|
||||
first = False
|
||||
return '\n'.join(ans)
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
po_files = glob.glob('*.po')
|
||||
for po_file in po_files:
|
||||
PoFile(po_file)
|
||||
pass
|
||||
|
||||
if __name__ == '__main__':
|
||||
pof = PoFile('de.po')
|
||||
open('/tmp/de.po', 'wb').write(str(pof))
|
||||
#sys.exit(main())
|
@ -15,7 +15,10 @@ def available_translations():
|
||||
global _available_translations
|
||||
if _available_translations is None:
|
||||
stats = P('localization/stats.pickle')
|
||||
stats = cPickle.load(open(stats, 'rb'))
|
||||
if os.path.exists(stats):
|
||||
stats = cPickle.load(open(stats, 'rb'))
|
||||
else:
|
||||
stats = {}
|
||||
_available_translations = [x for x in stats if stats[x] > 0.1]
|
||||
return _available_translations
|
||||
|
||||
|
@ -85,7 +85,7 @@ __all__ = [
|
||||
'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
|
||||
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
|
||||
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
|
||||
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
|
||||
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
|
||||
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
|
||||
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
|
||||
'indentedBlock', 'originalTextFor',
|
||||
@ -425,7 +425,7 @@ class ParseResults(object):
|
||||
self[k] = v
|
||||
if isinstance(v[0],ParseResults):
|
||||
v[0].__parent = wkref(self)
|
||||
|
||||
|
||||
self.__toklist += other.__toklist
|
||||
self.__accumNames.update( other.__accumNames )
|
||||
del other
|
||||
@ -3231,12 +3231,12 @@ def originalTextFor(expr, asString=True):
|
||||
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
|
||||
revert separate tokens with intervening whitespace back to the original matching
|
||||
input text. Simpler to use than the parse action keepOriginalText, and does not
|
||||
require the inspect module to chase up the call stack. By default, returns a
|
||||
string containing the original parsed text.
|
||||
|
||||
If the optional asString argument is passed as False, then the return value is a
|
||||
ParseResults containing any results names that were originally matched, and a
|
||||
single token containing the original matched text from the input string. So if
|
||||
require the inspect module to chase up the call stack. By default, returns a
|
||||
string containing the original parsed text.
|
||||
|
||||
If the optional asString argument is passed as False, then the return value is a
|
||||
ParseResults containing any results names that were originally matched, and a
|
||||
single token containing the original matched text from the input string. So if
|
||||
the expression passed to originalTextFor contains expressions with defined
|
||||
results names, you must set asString to False if you want to preserve those
|
||||
results name values."""
|
||||
@ -3252,7 +3252,7 @@ def originalTextFor(expr, asString=True):
|
||||
del t["_original_end"]
|
||||
matchExpr.setParseAction(extractText)
|
||||
return matchExpr
|
||||
|
||||
|
||||
# convenience constants for positional expressions
|
||||
empty = Empty().setName("empty")
|
||||
lineStart = LineStart().setName("lineStart")
|
||||
@ -3532,7 +3532,7 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
|
||||
).setParseAction(lambda t:t[0].strip()))
|
||||
else:
|
||||
if ignoreExpr is not None:
|
||||
content = (Combine(OneOrMore(~ignoreExpr +
|
||||
content = (Combine(OneOrMore(~ignoreExpr +
|
||||
~Literal(opener) + ~Literal(closer) +
|
||||
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
|
||||
).setParseAction(lambda t:t[0].strip()))
|
||||
|
@ -20,6 +20,7 @@ class WriteXmlMixin:
|
||||
def to_xml(self, encoding = "iso-8859-1"):
|
||||
try:
|
||||
import cStringIO as StringIO
|
||||
StringIO
|
||||
except ImportError:
|
||||
import StringIO
|
||||
f = StringIO.StringIO()
|
||||
@ -64,7 +65,7 @@ def _format_date(dt):
|
||||
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
|
||||
dt.year, dt.hour, dt.minute, dt.second)
|
||||
|
||||
|
||||
|
||||
##
|
||||
# A couple simple wrapper objects for the fields which
|
||||
# take a simple value other than a string.
|
||||
@ -72,7 +73,7 @@ class IntElement:
|
||||
"""implements the 'publish' API for integers
|
||||
|
||||
Takes the tag name and the integer value to publish.
|
||||
|
||||
|
||||
(Could be used for anything which uses str() to be published
|
||||
to text for XML.)
|
||||
"""
|
||||
@ -138,7 +139,7 @@ class Image:
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.description = description
|
||||
|
||||
|
||||
def publish(self, handler):
|
||||
handler.startElement("image", self.element_attrs)
|
||||
|
||||
@ -150,7 +151,7 @@ class Image:
|
||||
if isinstance(width, int):
|
||||
width = IntElement("width", width)
|
||||
_opt_element(handler, "width", width)
|
||||
|
||||
|
||||
height = self.height
|
||||
if isinstance(height, int):
|
||||
height = IntElement("height", height)
|
||||
@ -196,7 +197,7 @@ class TextInput:
|
||||
_element(handler, "name", self.name)
|
||||
_element(handler, "link", self.link)
|
||||
handler.endElement("textInput")
|
||||
|
||||
|
||||
|
||||
class Enclosure:
|
||||
"""Publish an enclosure"""
|
||||
@ -255,7 +256,7 @@ class RSS2(WriteXmlMixin):
|
||||
Stores the channel attributes, with the "category" elements under
|
||||
".categories" and the RSS items under ".items".
|
||||
"""
|
||||
|
||||
|
||||
rss_attrs = {"version": "2.0"}
|
||||
element_attrs = {}
|
||||
def __init__(self,
|
||||
@ -269,7 +270,7 @@ class RSS2(WriteXmlMixin):
|
||||
webMaster = None,
|
||||
pubDate = None, # a datetime, *in* *GMT*
|
||||
lastBuildDate = None, # a datetime
|
||||
|
||||
|
||||
categories = None, # list of strings or Category
|
||||
generator = _generator_name,
|
||||
docs = "http://blogs.law.harvard.edu/tech/rss",
|
||||
@ -294,7 +295,7 @@ class RSS2(WriteXmlMixin):
|
||||
self.webMaster = webMaster
|
||||
self.pubDate = pubDate
|
||||
self.lastBuildDate = lastBuildDate
|
||||
|
||||
|
||||
if categories is None:
|
||||
categories = []
|
||||
self.categories = categories
|
||||
@ -320,7 +321,7 @@ class RSS2(WriteXmlMixin):
|
||||
_element(handler, "description", self.description)
|
||||
|
||||
self.publish_extensions(handler)
|
||||
|
||||
|
||||
_opt_element(handler, "language", self.language)
|
||||
_opt_element(handler, "copyright", self.copyright)
|
||||
_opt_element(handler, "managingEditor", self.managingEditor)
|
||||
@ -374,8 +375,8 @@ class RSS2(WriteXmlMixin):
|
||||
# output after the three required fields.
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class RSSItem(WriteXmlMixin):
|
||||
"""Publish an RSS Item"""
|
||||
element_attrs = {}
|
||||
@ -391,7 +392,7 @@ class RSSItem(WriteXmlMixin):
|
||||
pubDate = None, # a datetime
|
||||
source = None, # a Source
|
||||
):
|
||||
|
||||
|
||||
if title is None and description is None:
|
||||
raise TypeError(
|
||||
"must define at least one of 'title' or 'description'")
|
||||
@ -421,7 +422,7 @@ class RSSItem(WriteXmlMixin):
|
||||
if isinstance(category, basestring):
|
||||
category = Category(category)
|
||||
category.publish(handler)
|
||||
|
||||
|
||||
_opt_element(handler, "comments", self.comments)
|
||||
if self.enclosure is not None:
|
||||
self.enclosure.publish(handler)
|
||||
@ -434,7 +435,7 @@ class RSSItem(WriteXmlMixin):
|
||||
|
||||
if self.source is not None:
|
||||
self.source.publish(handler)
|
||||
|
||||
|
||||
handler.endElement("item")
|
||||
|
||||
def publish_extensions(self, handler):
|
||||
|
@ -57,13 +57,13 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'monitor', 'republika', 'beta', 'beta_en', 'glasjavnosti',
|
||||
'esquire', 'livemint', 'thedgesingapore', 'darknet', 'rga',
|
||||
'intelligencer', 'theoldfoodie', 'hln_be', 'honvedelem',
|
||||
'the_new_republic',
|
||||
)]
|
||||
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.path import path
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre import __appname__, english_sort
|
||||
|
||||
@ -102,8 +102,8 @@ def compile_recipe(src):
|
||||
'''
|
||||
global _tdir, _crep
|
||||
if _tdir is None or not os.path.exists(_tdir):
|
||||
_tdir = path(PersistentTemporaryDirectory('_recipes'))
|
||||
temp = _tdir/('recipe%d.py'%_crep)
|
||||
_tdir = PersistentTemporaryDirectory('_recipes')
|
||||
temp = os.path.join(_tdir, 'recipe%d.py'%_crep)
|
||||
_crep += 1
|
||||
if not isinstance(src, unicode):
|
||||
match = re.search(r'coding[:=]\s*([-\w.]+)', src[:200])
|
||||
@ -118,8 +118,9 @@ def compile_recipe(src):
|
||||
src = src.replace('from libprs500', 'from calibre').encode('utf-8')
|
||||
f.write(src)
|
||||
f.close()
|
||||
module = imp.find_module(temp.namebase, [temp.dirname()])
|
||||
module = imp.load_module(temp.namebase, *module)
|
||||
module = imp.find_module(os.path.splitext(os.path.basename(temp))[0],
|
||||
[os.path.dirname(temp)])
|
||||
module = imp.load_module(os.path.splitext(os.path.basename(temp))[0], *module)
|
||||
classes = inspect.getmembers(module,
|
||||
lambda x : inspect.isclass(x) and \
|
||||
issubclass(x, (BasicNewsRecipe,)) and \
|
||||
@ -148,6 +149,7 @@ _titles.sort(cmp=english_sort)
|
||||
titles = _titles
|
||||
|
||||
def migrate_automatic_profile_to_automatic_recipe(profile):
|
||||
BeautifulSoup
|
||||
oprofile = profile
|
||||
profile = compile_recipe(profile)
|
||||
if 'BasicUserProfile' not in profile.__name__:
|
||||
@ -165,3 +167,4 @@ class BasicUserRecipe%d(AutomaticNewsRecipe):
|
||||
'''%(int(time.time()), repr(profile.title), profile.oldest_article,
|
||||
profile.max_articles_per_feed, profile.summary_length, repr(profile.feeds))
|
||||
|
||||
|
||||
|
@ -1,61 +1,61 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
24sata.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Cro24Sata(BasicNewsRecipe):
|
||||
title = '24 Sata - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "News Portal from Croatia"
|
||||
publisher = '24sata.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 4
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
24sata.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Cro24Sata(BasicNewsRecipe):
|
||||
title = '24 Sata - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "News Portal from Croatia"
|
||||
publisher = '24sata.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 4
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'hr'
|
||||
|
||||
lang = 'hr-HR'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed'])
|
||||
,dict(name='table', attrs={'class':'enumbox'})
|
||||
]
|
||||
|
||||
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&action=ispis'
|
||||
|
||||
|
||||
lang = 'hr-HR'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed'])
|
||||
,dict(name='table', attrs={'class':'enumbox'})
|
||||
]
|
||||
|
||||
feeds = [(u'Najnovije Vijesti', u'http://www.24sata.hr/index.php?cmd=show_rss&action=novo')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&action=ispis'
|
||||
|
||||
|
@ -1,68 +1,68 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
24sata.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Ser24Sata(BasicNewsRecipe):
|
||||
title = '24 Sata - Sr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = '24 sata portal vesti iz Srbije'
|
||||
publisher = 'Ringier d.o.o.'
|
||||
category = 'news, politics, entertainment, Serbia'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
24sata.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Ser24Sata(BasicNewsRecipe):
|
||||
title = '24 Sata - Sr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = '24 sata portal vesti iz Srbije'
|
||||
publisher = 'Ringier d.o.o.'
|
||||
category = 'news, politics, entertainment, Serbia'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'sr'
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def print_version(self, url):
|
||||
article = url.partition('#')[0]
|
||||
article_id = article.partition('id=')[2]
|
||||
return 'http://www.24sata.rs/_print.php?id=' + article_id
|
||||
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [(u'Vesti Dana', u'http://www.24sata.rs/rss.php')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def print_version(self, url):
|
||||
article = url.partition('#')[0]
|
||||
article_id = article.partition('id=')[2]
|
||||
return 'http://www.24sata.rs/_print.php?id=' + article_id
|
||||
|
||||
|
@ -1,72 +1,72 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class SieteDias(BasicNewsRecipe):
|
||||
title = '7 dias'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Revista Argentina'
|
||||
publisher = 'ElArgentino.com'
|
||||
category = 'news, politics, show, Argentina'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class SieteDias(BasicNewsRecipe):
|
||||
title = '7 dias'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Revista Argentina'
|
||||
publisher = 'ElArgentino.com'
|
||||
category = 'news, politics, show, Argentina'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html'
|
||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=125&Content-Type=text/xml&ChannelDesc=7%20D%C3%ADas')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, article_part = url.partition('/nota-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||
if cover_item:
|
||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||
return cover_url
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
base, sep, rest = url.rpartition('?Id=')
|
||||
img, sep2, rrest = rest.partition('&')
|
||||
return base + sep + img
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
INDEX = 'http://www.elargentino.com/medios/125/7-Dias.html'
|
||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=125&Content-Type=text/xml&ChannelDesc=7%20D%C3%ADas')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, article_part = url.partition('/nota-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||
if cover_item:
|
||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||
return cover_url
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
base, sep, rest = url.rpartition('?Id=')
|
||||
img, sep2, rrest = rest.partition('&')
|
||||
return base + sep + img
|
||||
|
@ -1,59 +1,59 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.accountancyage.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class AccountancyAge(BasicNewsRecipe):
|
||||
title = 'Accountancy Age'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'business news'
|
||||
publisher = 'accountancyage.com'
|
||||
category = 'news, politics, finances'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
simultaneous_downloads = 1
|
||||
encoding = 'utf-8'
|
||||
lang = 'en'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.accountancyage.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class AccountancyAge(BasicNewsRecipe):
|
||||
title = 'Accountancy Age'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'business news'
|
||||
publisher = 'accountancyage.com'
|
||||
category = 'news, politics, finances'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
simultaneous_downloads = 1
|
||||
encoding = 'utf-8'
|
||||
lang = 'en'
|
||||
language = 'en'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'bodycol'})]
|
||||
remove_tags = [dict(name=['embed','object'])]
|
||||
remove_tags_after = dict(name='div', attrs={'id':'permalink'})
|
||||
remove_tags_before = dict(name='div', attrs={'class':'gap6'})
|
||||
|
||||
feeds = [(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')]
|
||||
|
||||
def print_version(self, url):
|
||||
rest, sep, miss = url.rpartition('/')
|
||||
rr, ssep, artid = rest.rpartition('/')
|
||||
return u'http://www.accountancyage.com/articles/print/' + artid
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'bodycol'})]
|
||||
remove_tags = [dict(name=['embed','object'])]
|
||||
remove_tags_after = dict(name='div', attrs={'id':'permalink'})
|
||||
remove_tags_before = dict(name='div', attrs={'class':'gap6'})
|
||||
|
||||
feeds = [(u'All News', u'http://feeds.accountancyage.com/rss/latest/accountancyage/all')]
|
||||
|
||||
def print_version(self, url):
|
||||
rest, sep, miss = url.rpartition('/')
|
||||
rr, ssep, artid = rest.rpartition('/')
|
||||
return u'http://www.accountancyage.com/articles/print/' + artid
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
@ -1,77 +1,77 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.adventuregamers.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdventureGamers(BasicNewsRecipe):
|
||||
title = u'Adventure Gamers'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.adventuregamers.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdventureGamers(BasicNewsRecipe):
|
||||
title = u'Adventure Gamers'
|
||||
language = 'en'
|
||||
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Adventure games portal'
|
||||
publisher = 'Adventure Gamers'
|
||||
category = 'news, games, adventure, technology'
|
||||
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Adventure games portal'
|
||||
publisher = 'Adventure Gamers'
|
||||
category = 'news, games, adventure, technology'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 10
|
||||
delay = 10
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
INDEX = u'http://www.adventuregamers.com'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'content_middle'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed','form'])
|
||||
,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div',attrs={'class':'toolbar_fat_next'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager.a['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':'bodytext'})
|
||||
for it in texttag.findAll(style=True):
|
||||
del it['style']
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
self.append_page(soup, soup.body, 3)
|
||||
pager = soup.find('div',attrs={'class':'toolbar_fat'})
|
||||
if pager:
|
||||
pager.extract()
|
||||
return soup
|
||||
|
||||
oldest_article = 10
|
||||
delay = 10
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
INDEX = u'http://www.adventuregamers.com'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'content_middle'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed','form'])
|
||||
,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div',attrs={'class':'toolbar_fat_next'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager.a['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'class':'bodytext'})
|
||||
for it in texttag.findAll(style=True):
|
||||
del it['style']
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
self.append_page(soup, soup.body, 3)
|
||||
pager = soup.find('div',attrs={'class':'toolbar_fat'})
|
||||
if pager:
|
||||
pager.extract()
|
||||
return soup
|
||||
|
@ -1,62 +1,61 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
ambito.com
|
||||
'''
|
||||
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
ambito.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Ambito(BasicNewsRecipe):
|
||||
title = 'Ambito.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
publisher = 'Ambito.com'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'iso-8859-1'
|
||||
cover_url = 'http://www.ambito.com/img/logo_.jpg'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
class Ambito(BasicNewsRecipe):
|
||||
title = 'Ambito.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
publisher = 'Ambito.com'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'iso-8859-1'
|
||||
cover_url = 'http://www.ambito.com/img/logo_.jpg'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link'])]
|
||||
|
||||
feeds = [
|
||||
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
||||
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
||||
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' )
|
||||
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General')
|
||||
,(u'Agro' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
|
||||
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' )
|
||||
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' )
|
||||
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' )
|
||||
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnologia' )
|
||||
,(u'Salud' , u'http://www.ambito.com/rss/noticias.asp?S=Salud' )
|
||||
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link'])]
|
||||
|
||||
feeds = [
|
||||
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
||||
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
||||
,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' )
|
||||
,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General')
|
||||
,(u'Agro' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' )
|
||||
,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' )
|
||||
,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' )
|
||||
,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' )
|
||||
,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnologia' )
|
||||
,(u'Salud' , u'http://www.ambito.com/rss/noticias.asp?S=Salud' )
|
||||
,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = 'es'
|
||||
|
@ -1,55 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
spectator.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheAmericanSpectator(BasicNewsRecipe):
|
||||
title = 'The American Spectator'
|
||||
__author__ = 'Darko Miletic'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
spectator.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheAmericanSpectator(BasicNewsRecipe):
|
||||
title = 'The American Spectator'
|
||||
__author__ = 'Darko Miletic'
|
||||
language = 'en'
|
||||
|
||||
description = 'News from USA'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
INDEX = 'http://spectator.org'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , 'news, politics, USA'
|
||||
, '--publisher' , title
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'post inner'})
|
||||
,dict(name='div', attrs={'class':'author-bio'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='object')
|
||||
,dict(name='div', attrs={'class':'col3' })
|
||||
,dict(name='div', attrs={'class':'post-options' })
|
||||
,dict(name='p' , attrs={'class':'letter-editor'})
|
||||
,dict(name='div', attrs={'class':'social' })
|
||||
]
|
||||
|
||||
feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
link_item = soup.find('a',attrs={'class':'cover'})
|
||||
if link_item:
|
||||
soup2 = self.index_to_soup(link_item['href'])
|
||||
link_item2 = soup2.find('div',attrs={'class':'post inner issues'})
|
||||
cover_url = self.INDEX + link_item2.img['src']
|
||||
return cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '/print'
|
||||
|
||||
description = 'News from USA'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
INDEX = 'http://spectator.org'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , 'news, politics, USA'
|
||||
, '--publisher' , title
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'post inner'})
|
||||
,dict(name='div', attrs={'class':'author-bio'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='object')
|
||||
,dict(name='div', attrs={'class':'col3' })
|
||||
,dict(name='div', attrs={'class':'post-options' })
|
||||
,dict(name='p' , attrs={'class':'letter-editor'})
|
||||
,dict(name='div', attrs={'class':'social' })
|
||||
]
|
||||
|
||||
feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
link_item = soup.find('a',attrs={'class':'cover'})
|
||||
if link_item:
|
||||
soup2 = self.index_to_soup(link_item['href'])
|
||||
link_item2 = soup2.find('div',attrs={'class':'post inner issues'})
|
||||
cover_url = self.INDEX + link_item2.img['src']
|
||||
return cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '/print'
|
||||
|
@ -1,62 +1,62 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
axxon.com.ar
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Axxon_news(BasicNewsRecipe):
|
||||
title = 'Axxon noticias'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Axxon, Ciencia Ficcion en Bits'
|
||||
publisher = 'Axxon'
|
||||
category = 'news, SF, Argentina, science, movies'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
axxon.com.ar
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Axxon_news(BasicNewsRecipe):
|
||||
title = 'Axxon noticias'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Axxon, Ciencia Ficcion en Bits'
|
||||
publisher = 'Axxon'
|
||||
category = 'news, SF, Argentina, science, movies'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
|
||||
lang = 'es-AR'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','iframe','embed'])]
|
||||
|
||||
feeds = [(u'Noticias', u'http://axxon.com.ar/noticias/feed/')]
|
||||
|
||||
remove_attributes = ['style','width','height','font','border','align']
|
||||
|
||||
|
||||
def adeify_images2(cls, soup):
|
||||
for item in soup.findAll('img'):
|
||||
for attrib in ['height','width','border','align','style']:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
oldParent = item.parent
|
||||
if oldParent.name == 'a':
|
||||
oldParent.name == 'p'
|
||||
myIndex = oldParent.contents.index(item)
|
||||
brtag = Tag(soup,'br')
|
||||
oldParent.insert(myIndex+1,brtag)
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.html.insert(0,mlang)
|
||||
return self.adeify_images2(soup)
|
||||
|
||||
|
||||
lang = 'es-AR'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'post'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','iframe','embed'])]
|
||||
|
||||
feeds = [(u'Noticias', u'http://axxon.com.ar/noticias/feed/')]
|
||||
|
||||
remove_attributes = ['style','width','height','font','border','align']
|
||||
|
||||
|
||||
def adeify_images2(cls, soup):
|
||||
for item in soup.findAll('img'):
|
||||
for attrib in ['height','width','border','align','style']:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
oldParent = item.parent
|
||||
if oldParent.name == 'a':
|
||||
oldParent.name == 'p'
|
||||
myIndex = oldParent.contents.index(item)
|
||||
brtag = Tag(soup,'br')
|
||||
oldParent.insert(myIndex+1,brtag)
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.html.insert(0,mlang)
|
||||
return self.adeify_images2(soup)
|
||||
|
||||
|
@ -1,65 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.azstarnet.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Azstarnet(BasicNewsRecipe):
|
||||
title = 'Arizona Daily Star'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'news from Arizona'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.azstarnet.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Azstarnet(BasicNewsRecipe):
|
||||
title = 'Arizona Daily Star'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'news from Arizona'
|
||||
language = 'en'
|
||||
|
||||
publisher = 'azstarnet.com'
|
||||
category = 'news, politics, Arizona, USA'
|
||||
delay = 1
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
needs_subscription = True
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://azstarnet.com/registration/retro.php')
|
||||
br.select_form(nr=1)
|
||||
br['email'] = self.username
|
||||
br['pass' ] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe','base','img'])
|
||||
,dict(name='div',attrs={'class':'bannerinstory'})
|
||||
]
|
||||
|
||||
|
||||
feeds = [(u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['dir' ] = 'ltr'
|
||||
soup.html['lang'] = 'en-US'
|
||||
mtag = '\n<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
||||
publisher = 'azstarnet.com'
|
||||
category = 'news, politics, Arizona, USA'
|
||||
delay = 1
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
needs_subscription = True
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://azstarnet.com/registration/retro.php')
|
||||
br.select_form(nr=1)
|
||||
br['email'] = self.username
|
||||
br['pass' ] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'storycontent'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe','base','img'])
|
||||
,dict(name='div',attrs={'class':'bannerinstory'})
|
||||
]
|
||||
|
||||
|
||||
feeds = [(u'Tucson Region', u'http://rss.azstarnet.com/index.php?site=metro')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['dir' ] = 'ltr'
|
||||
soup.html['lang'] = 'en-US'
|
||||
mtag = '\n<meta http-equiv="Content-Language" content="en-US"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
@ -1,69 +1,69 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
b92.net
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class B92(BasicNewsRecipe):
|
||||
title = 'B92'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Dnevne vesti iz Srbije i sveta'
|
||||
publisher = 'B92'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1250'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
b92.net
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class B92(BasicNewsRecipe):
|
||||
title = 'B92'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Dnevne vesti iz Srbije i sveta'
|
||||
publisher = 'B92'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1250'
|
||||
language = 'sr'
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'class':'maindocument'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'class':'comment-nav'})
|
||||
,dict(name=['embed','link','base'] )
|
||||
,dict(name='div', attrs={'class':'udokum'} )
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
|
||||
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&version=print'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
del soup.body['onload']
|
||||
for item in soup.findAll('font'):
|
||||
item.name='div'
|
||||
if item.has_key('size'):
|
||||
del item['size']
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'class':'maindocument'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='ul', attrs={'class':'comment-nav'})
|
||||
,dict(name=['embed','link','base'] )
|
||||
,dict(name='div', attrs={'class':'udokum'} )
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
|
||||
,(u'Biz' , u'http://www.b92.net/info/rss/biz.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&version=print'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
del soup.body['onload']
|
||||
for item in soup.findAll('font'):
|
||||
item.name='div'
|
||||
if item.has_key('size'):
|
||||
del item['size']
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
@ -1,93 +1,93 @@
|
||||
##
|
||||
## web2lrf profile to download articles from Barrons.com
|
||||
## can download subscriber-only content if username and
|
||||
## password are supplied.
|
||||
##
|
||||
'''
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Barrons(BasicNewsRecipe):
|
||||
|
||||
title = 'Barron\'s'
|
||||
max_articles_per_feed = 50
|
||||
needs_subscription = True
|
||||
##
|
||||
## web2lrf profile to download articles from Barrons.com
|
||||
## can download subscriber-only content if username and
|
||||
## password are supplied.
|
||||
##
|
||||
'''
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Barrons(BasicNewsRecipe):
|
||||
|
||||
title = 'Barron\'s'
|
||||
max_articles_per_feed = 50
|
||||
needs_subscription = True
|
||||
language = 'en'
|
||||
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Weekly publication for investors from the publisher of the Wall Street Journal'
|
||||
timefmt = ' [%a, %b %d, %Y]'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = False
|
||||
match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
|
||||
conversion_options = {'linearize_tables': True}
|
||||
##delay = 1
|
||||
|
||||
## Don't grab articles more than 7 days old
|
||||
oldest_article = 7
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
## Remove anything before the body of the article.
|
||||
(r'<body.*?<!-- article start', lambda match: '<body><!-- article start'),
|
||||
|
||||
## Remove any insets from the body of the article.
|
||||
(r'<div id="inset".*?</div>.?</div>.?<p', lambda match : '<p'),
|
||||
|
||||
## Remove any reprint info from the body of the article.
|
||||
(r'<hr size.*?<p', lambda match : '<p'),
|
||||
|
||||
## Remove anything after the end of the article.
|
||||
(r'<!-- article end.*?</body>', lambda match : '</body>'),
|
||||
]
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://commerce.barrons.com/auth/login')
|
||||
br.select_form(name='login_form')
|
||||
br['user'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
## Use the print version of a page when available.
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/article/', '/article_print/')
|
||||
|
||||
## Comment out the feeds you don't want retrieved.
|
||||
## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire
|
||||
|
||||
def get_feeds(self):
|
||||
return [
|
||||
('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'),
|
||||
('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'),
|
||||
('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'),
|
||||
('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'),
|
||||
('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'),
|
||||
('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
|
||||
]
|
||||
|
||||
## Logout of website
|
||||
## NOT CURRENTLY WORKING
|
||||
# def cleanup(self):
|
||||
# try:
|
||||
# self.browser.set_debug_responses(True)
|
||||
# import sys, logging
|
||||
# logger = logging.getLogger("mechanize")
|
||||
# logger.addHandler(logging.StreamHandler(sys.stdout))
|
||||
# logger.setLevel(logging.INFO)
|
||||
|
||||
# res = self.browser.open('http://online.barrons.com/logout')
|
||||
# except:
|
||||
# import traceback
|
||||
# traceback.print_exc()
|
||||
|
||||
|
||||
|
||||
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Weekly publication for investors from the publisher of the Wall Street Journal'
|
||||
timefmt = ' [%a, %b %d, %Y]'
|
||||
use_embedded_content = False
|
||||
no_stylesheets = False
|
||||
match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
|
||||
conversion_options = {'linearize_tables': True}
|
||||
##delay = 1
|
||||
|
||||
## Don't grab articles more than 7 days old
|
||||
oldest_article = 7
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
## Remove anything before the body of the article.
|
||||
(r'<body.*?<!-- article start', lambda match: '<body><!-- article start'),
|
||||
|
||||
## Remove any insets from the body of the article.
|
||||
(r'<div id="inset".*?</div>.?</div>.?<p', lambda match : '<p'),
|
||||
|
||||
## Remove any reprint info from the body of the article.
|
||||
(r'<hr size.*?<p', lambda match : '<p'),
|
||||
|
||||
## Remove anything after the end of the article.
|
||||
(r'<!-- article end.*?</body>', lambda match : '</body>'),
|
||||
]
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://commerce.barrons.com/auth/login')
|
||||
br.select_form(name='login_form')
|
||||
br['user'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
## Use the print version of a page when available.
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/article/', '/article_print/')
|
||||
|
||||
## Comment out the feeds you don't want retrieved.
|
||||
## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire
|
||||
|
||||
def get_feeds(self):
|
||||
return [
|
||||
('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'),
|
||||
('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'),
|
||||
('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'),
|
||||
('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'),
|
||||
('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'),
|
||||
('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
|
||||
]
|
||||
|
||||
## Logout of website
|
||||
## NOT CURRENTLY WORKING
|
||||
# def cleanup(self):
|
||||
# try:
|
||||
# self.browser.set_debug_responses(True)
|
||||
# import sys, logging
|
||||
# logger = logging.getLogger("mechanize")
|
||||
# logger.addHandler(logging.StreamHandler(sys.stdout))
|
||||
# logger.setLevel(logging.INFO)
|
||||
|
||||
# res = self.browser.open('http://online.barrons.com/logout')
|
||||
# except:
|
||||
# import traceback
|
||||
# traceback.print_exc()
|
||||
|
||||
|
||||
|
||||
|
@ -1,35 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Huan Komrade T <huantnh at gmail.com>'
|
||||
'''
|
||||
bbc.co.uk
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BBCVietnamese(BasicNewsRecipe):
|
||||
title = u'BBC Vietnamese'
|
||||
__author__ = 'Huan Komrade T'
|
||||
description = 'Vietnam news and current affairs from the British Broadcasting Corporation'
|
||||
no_stylesheets = True
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Huan Komrade T <huantnh at gmail.com>'
|
||||
'''
|
||||
bbc.co.uk
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BBCVietnamese(BasicNewsRecipe):
|
||||
title = u'BBC Vietnamese'
|
||||
__author__ = 'Huan Komrade T'
|
||||
description = 'Vietnam news and current affairs from the British Broadcasting Corporation'
|
||||
no_stylesheets = True
|
||||
language = 'vi'
|
||||
|
||||
encoding = 'utf-8'
|
||||
recursions = 0
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||
|
||||
feeds = [
|
||||
('Index', 'http://www.bbc.co.uk/vietnamese/index.xml'),
|
||||
('Vietnam', 'http://www.bbc.co.uk/vietnamese/vietnam/index.xml'),
|
||||
('Business', 'http://www.bbc.co.uk/vietnamese/business/index.xml'),
|
||||
('Culture', 'http://www.bbc.co.uk/vietnamese/culture/index.xml'),
|
||||
('Football', 'http://www.bbc.co.uk/vietnamese/football/index.xml'),
|
||||
('Forum', 'http://www.bbc.co.uk/vietnamese/forum/index.xml'),
|
||||
('In Depth', 'http://www.bbc.co.uk/vietnamese/indepth/index.xml'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.bbc.co.uk/vietnamese/', 'http://www.bbc.co.uk/vietnamese/lg/')
|
||||
|
||||
encoding = 'utf-8'
|
||||
recursions = 0
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||
|
||||
feeds = [
|
||||
('Index', 'http://www.bbc.co.uk/vietnamese/index.xml'),
|
||||
('Vietnam', 'http://www.bbc.co.uk/vietnamese/vietnam/index.xml'),
|
||||
('Business', 'http://www.bbc.co.uk/vietnamese/business/index.xml'),
|
||||
('Culture', 'http://www.bbc.co.uk/vietnamese/culture/index.xml'),
|
||||
('Football', 'http://www.bbc.co.uk/vietnamese/football/index.xml'),
|
||||
('Forum', 'http://www.bbc.co.uk/vietnamese/forum/index.xml'),
|
||||
('In Depth', 'http://www.bbc.co.uk/vietnamese/indepth/index.xml'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.bbc.co.uk/vietnamese/', 'http://www.bbc.co.uk/vietnamese/lg/')
|
||||
|
@ -1,51 +1,51 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
beta.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'BETA'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Novinska Agencija'
|
||||
publisher = 'Beta'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = True
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
beta.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'BETA'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Novinska Agencija'
|
||||
publisher = 'Beta'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = True
|
||||
language = 'sr'
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [
|
||||
(u'Vesti dana', u'http://www.beta.rs/rssvd.asp')
|
||||
,(u'Ekonomija' , u'http://www.beta.rs/rssek.asp')
|
||||
,(u'Sport' , u'http://www.beta.rs/rsssp.asp')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
feeds = [
|
||||
(u'Vesti dana', u'http://www.beta.rs/rssvd.asp')
|
||||
,(u'Ekonomija' , u'http://www.beta.rs/rssek.asp')
|
||||
,(u'Sport' , u'http://www.beta.rs/rsssp.asp')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
@ -1,38 +1,37 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
beta.rs
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'BETA - English'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Serbian news agency'
|
||||
publisher = 'Beta'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = True
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
beta.rs
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'BETA - English'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Serbian news agency'
|
||||
publisher = 'Beta'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = True
|
||||
language = 'en'
|
||||
|
||||
lang = 'en'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
feeds = [(u'News', u'http://www.beta.rs/rssen.asp')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
lang = 'en'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
feeds = [(u'News', u'http://www.beta.rs/rssen.asp')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
@ -1,66 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blic.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Blic(BasicNewsRecipe):
|
||||
title = 'Blic'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
||||
publisher = 'RINGIER d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
delay = 1
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blic.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Blic(BasicNewsRecipe):
|
||||
title = 'Blic'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
||||
publisher = 'RINGIER d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
delay = 1
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'sr'
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
|
||||
|
||||
feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
|
||||
|
||||
remove_tags = [dict(name=['object','link'])]
|
||||
|
||||
def print_version(self, url):
|
||||
rest_url = url.partition('?')[2]
|
||||
return u'http://www.blic.rs/_print.php?' + rest_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def get_article_url(self, article):
|
||||
raw = article.get('link', None)
|
||||
return raw.replace('.co.yu','.rs')
|
||||
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
|
||||
|
||||
feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
|
||||
|
||||
remove_tags = [dict(name=['object','link'])]
|
||||
|
||||
def print_version(self, url):
|
||||
rest_url = url.partition('?')[2]
|
||||
return u'http://www.blic.rs/_print.php?' + rest_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def get_article_url(self, article):
|
||||
raw = article.get('link', None)
|
||||
return raw.replace('.co.yu','.rs')
|
||||
|
||||
|
@ -1,95 +1,95 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
borba.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Borba(BasicNewsRecipe):
|
||||
title = 'Borba Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Dnevne novine Borba Online'
|
||||
publisher = 'IP Novine Borba'
|
||||
category = 'news, politics, Serbia'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
borba.rs
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Borba(BasicNewsRecipe):
|
||||
title = 'Borba Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Dnevne novine Borba Online'
|
||||
publisher = 'IP Novine Borba'
|
||||
category = 'news, politics, Serbia'
|
||||
language = 'sr'
|
||||
|
||||
lang = _('sr-Latn-RS')
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
|
||||
INDEX = u'http://www.borba.rs/'
|
||||
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'main'})]
|
||||
|
||||
remove_tags_after = dict(name='div',attrs={'id':'written_comments_title'})
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe','base','img'])
|
||||
,dict(name='div',attrs={'id':'written_comments_title'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Najnovije vesti', u'http://www.borba.rs/content/blogsection/28/105/')
|
||||
,(u'Prvi plan' , u'http://www.borba.rs/content/blogsection/4/92/' )
|
||||
,(u'Dogadjaji' , u'http://www.borba.rs/content/blogsection/21/83/' )
|
||||
,(u'Ekonomija' , u'http://www.borba.rs/content/blogsection/5/35/' )
|
||||
,(u'Komentari' , u'http://www.borba.rs/content/blogsection/23/94/' )
|
||||
,(u'Svet' , u'http://www.borba.rs/content/blogsection/7/36/' )
|
||||
,(u'Sport' , u'http://www.borba.rs/content/blogsection/6/37/' )
|
||||
,(u'Fama' , u'http://www.borba.rs/content/blogsection/25/89/' )
|
||||
,(u'B2 Dodatak' , u'http://www.borba.rs/content/blogsection/30/116/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('a', attrs={'class':'contentpagetitle'}):
|
||||
url = item['href']
|
||||
title = self.tag_to_string(item)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :''
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
||||
|
||||
lang = _('sr-Latn-RS')
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
|
||||
INDEX = u'http://www.borba.rs/'
|
||||
extra_css = ' @font-face {font-family: "serif1"; src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .contentheading{font-size: x-large; font-weight: bold} .createdate{font-size: small; font-weight: bold} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'main'})]
|
||||
|
||||
remove_tags_after = dict(name='div',attrs={'id':'written_comments_title'})
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe','base','img'])
|
||||
,dict(name='div',attrs={'id':'written_comments_title'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Najnovije vesti', u'http://www.borba.rs/content/blogsection/28/105/')
|
||||
,(u'Prvi plan' , u'http://www.borba.rs/content/blogsection/4/92/' )
|
||||
,(u'Dogadjaji' , u'http://www.borba.rs/content/blogsection/21/83/' )
|
||||
,(u'Ekonomija' , u'http://www.borba.rs/content/blogsection/5/35/' )
|
||||
,(u'Komentari' , u'http://www.borba.rs/content/blogsection/23/94/' )
|
||||
,(u'Svet' , u'http://www.borba.rs/content/blogsection/7/36/' )
|
||||
,(u'Sport' , u'http://www.borba.rs/content/blogsection/6/37/' )
|
||||
,(u'Fama' , u'http://www.borba.rs/content/blogsection/25/89/' )
|
||||
,(u'B2 Dodatak' , u'http://www.borba.rs/content/blogsection/30/116/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
totalfeeds = []
|
||||
lfeeds = self.get_feeds()
|
||||
for feedobj in lfeeds:
|
||||
feedtitle, feedurl = feedobj
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||
articles = []
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('a', attrs={'class':'contentpagetitle'}):
|
||||
url = item['href']
|
||||
title = self.tag_to_string(item)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :''
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
totalfeeds.append((feedtitle, articles))
|
||||
return totalfeeds
|
||||
|
||||
|
@ -1,72 +1,72 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class BsAsEconomico(BasicNewsRecipe):
|
||||
title = 'Buenos Aires Economico'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Revista Argentina'
|
||||
publisher = 'ElArgentino.com'
|
||||
category = 'news, politics, economy, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class BsAsEconomico(BasicNewsRecipe):
|
||||
title = 'Buenos Aires Economico'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Revista Argentina'
|
||||
publisher = 'ElArgentino.com'
|
||||
category = 'news, politics, economy, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
INDEX = 'http://www.elargentino.com/medios/121/Buenos-Aires-Economico.html'
|
||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=121&Content-Type=text/xml&ChannelDesc=Buenos%20Aires%20Econ%C3%B3mico')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, article_part = url.partition('/nota-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||
if cover_item:
|
||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||
return cover_url
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
base, sep, rest = url.rpartition('?Id=')
|
||||
img, sep2, rrest = rest.partition('&')
|
||||
return base + sep + img
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
INDEX = 'http://www.elargentino.com/medios/121/Buenos-Aires-Economico.html'
|
||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=121&Content-Type=text/xml&ChannelDesc=Buenos%20Aires%20Econ%C3%B3mico')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, article_part = url.partition('/nota-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||
if cover_item:
|
||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||
return cover_url
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
base, sep, rest = url.rpartition('?Id=')
|
||||
img, sep2, rrest = rest.partition('&')
|
||||
return base + sep + img
|
||||
|
@ -1,46 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
chicagobreakingnews.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ChicagoBreakingNews(BasicNewsRecipe):
|
||||
title = 'Chicago Breaking News'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Breaking News from Chicago'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
publisher = 'Chicago Breaking News'
|
||||
category = 'news, politics, USA, Chicago'
|
||||
encoding = 'utf8'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
chicagobreakingnews.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ChicagoBreakingNews(BasicNewsRecipe):
|
||||
title = 'Chicago Breaking News'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Breaking News from Chicago'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
publisher = 'Chicago Breaking News'
|
||||
category = 'news, politics, USA, Chicago'
|
||||
encoding = 'utf8'
|
||||
language = 'en'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u'Breaking news', u'http://feeds2.feedburner.com/ChicagoBreakingNews/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
links = soup.findAll('a')
|
||||
for item in soup.findAll('a'):
|
||||
if item['href'].find('http://feedads.googleadservices.com') > -1:
|
||||
item.extract()
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(color=True):
|
||||
del item['color']
|
||||
for item in soup.findAll(size=True):
|
||||
del item['size']
|
||||
return soup
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u'Breaking news', u'http://feeds2.feedburner.com/ChicagoBreakingNews/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
links = soup.findAll('a')
|
||||
for item in soup.findAll('a'):
|
||||
if item['href'].find('http://feedads.googleadservices.com') > -1:
|
||||
item.extract()
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(color=True):
|
||||
del item['color']
|
||||
for item in soup.findAll(size=True):
|
||||
del item['size']
|
||||
return soup
|
||||
|
@ -3,11 +3,7 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
from urlparse import urlparse, urlunparse
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from threading import RLock
|
||||
|
||||
class ChicagoTribune(BasicNewsRecipe):
|
||||
|
||||
|
@ -1,73 +1,73 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
clarin.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Clarin(BasicNewsRecipe):
|
||||
title = 'Clarin'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina y mundo'
|
||||
publisher = 'Grupo Clarin'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
||||
remove_javascript = True
|
||||
encoding = 'cp1252'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
clarin.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Clarin(BasicNewsRecipe):
|
||||
title = 'Clarin'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina y mundo'
|
||||
publisher = 'Grupo Clarin'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
||||
remove_javascript = True
|
||||
encoding = 'cp1252'
|
||||
language = 'es'
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='a' , attrs={'class':'Imp' })
|
||||
,dict(name='div' , attrs={'class':'Perma' })
|
||||
,dict(name='h1' , text='Imprimir' )
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Ultimo Momento', u'http://www.clarin.com/diario/hoy/um/sumariorss.xml')
|
||||
,(u'El Pais' , u'http://www.clarin.com/diario/hoy/elpais.xml' )
|
||||
,(u'Opinion' , u'http://www.clarin.com/diario/hoy/opinion.xml' )
|
||||
,(u'El Mundo' , u'http://www.clarin.com/diario/hoy/elmundo.xml' )
|
||||
,(u'Sociedad' , u'http://www.clarin.com/diario/hoy/sociedad.xml' )
|
||||
,(u'La Ciudad' , u'http://www.clarin.com/diario/hoy/laciudad.xml' )
|
||||
,(u'Policiales' , u'http://www.clarin.com/diario/hoy/policiales.xml' )
|
||||
,(u'Deportes' , u'http://www.clarin.com/diario/hoy/deportes.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
rest = url.partition('-0')[-1]
|
||||
lmain = rest.partition('.')[0]
|
||||
lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
||||
return lurl
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
extra_css = ' .Txt{ font-family: sans-serif } .Volan{ font-family: sans-serif; font-size: x-small} .Pie{ font-family: sans-serif; font-size: x-small} .Copete{font-family: sans-serif; font-size: large} .Hora{font-family: sans-serif; font-size: large} .Autor{font-family: sans-serif; font-size: small} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='a' , attrs={'class':'Imp' })
|
||||
,dict(name='div' , attrs={'class':'Perma' })
|
||||
,dict(name='h1' , text='Imprimir' )
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Ultimo Momento', u'http://www.clarin.com/diario/hoy/um/sumariorss.xml')
|
||||
,(u'El Pais' , u'http://www.clarin.com/diario/hoy/elpais.xml' )
|
||||
,(u'Opinion' , u'http://www.clarin.com/diario/hoy/opinion.xml' )
|
||||
,(u'El Mundo' , u'http://www.clarin.com/diario/hoy/elmundo.xml' )
|
||||
,(u'Sociedad' , u'http://www.clarin.com/diario/hoy/sociedad.xml' )
|
||||
,(u'La Ciudad' , u'http://www.clarin.com/diario/hoy/laciudad.xml' )
|
||||
,(u'Policiales' , u'http://www.clarin.com/diario/hoy/policiales.xml' )
|
||||
,(u'Deportes' , u'http://www.clarin.com/diario/hoy/deportes.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
rest = url.partition('-0')[-1]
|
||||
lmain = rest.partition('.')[0]
|
||||
lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
||||
return lurl
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
@ -1,46 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
climateprogress.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class ClimateProgress(BasicNewsRecipe):
|
||||
title = 'Climate Progress'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "An insider's view of climate science, politics and solutions"
|
||||
publisher = 'Climate Progress'
|
||||
category = 'news, ecology, climate, blog'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
encoding = 'utf-8'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
climateprogress.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class ClimateProgress(BasicNewsRecipe):
|
||||
title = 'Climate Progress'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "An insider's view of climate science, politics and solutions"
|
||||
publisher = 'Climate Progress'
|
||||
category = 'news, ecology, climate, blog'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
encoding = 'utf-8'
|
||||
language = 'en'
|
||||
|
||||
lang = 'en-US'
|
||||
direction = 'ltr'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u'Posts', u'http://feeds.feedburner.com/climateprogress/lCrX')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
||||
lang = 'en-US'
|
||||
direction = 'ltr'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u'Posts', u'http://feeds.feedburner.com/climateprogress/lCrX')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
@ -1,41 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.codinghorror.com/blog/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CodingHorror(BasicNewsRecipe):
|
||||
title = 'Coding Horror'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'programming and human factors - Jeff Atwood'
|
||||
category = 'blog, programming'
|
||||
publisher = 'Jeff Atwood'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.codinghorror.com/blog/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CodingHorror(BasicNewsRecipe):
|
||||
title = 'Coding Horror'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'programming and human factors - Jeff Atwood'
|
||||
category = 'blog, programming'
|
||||
publisher = 'Jeff Atwood'
|
||||
language = 'en'
|
||||
|
||||
author = 'Jeff Atwood'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
encoding = 'cp1252'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--author' , author
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nauthors="' + author + '"'
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='div',attrs={'class':'feedflare'})
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/codinghorror' )]
|
||||
|
||||
|
||||
author = 'Jeff Atwood'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
encoding = 'cp1252'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--author' , author
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nauthors="' + author + '"'
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='div',attrs={'class':'feedflare'})
|
||||
]
|
||||
|
||||
feeds = [(u'Articles', u'http://feeds2.feedburner.com/codinghorror' )]
|
||||
|
||||
|
@ -1,46 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.corriere.it/english
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Corriere_en(BasicNewsRecipe):
|
||||
title = 'Corriere della Sera in English'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Milan and Italy'
|
||||
oldest_article = 15
|
||||
publisher = 'Corriere della Sera'
|
||||
category = 'news, politics, Italy'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.corriere.it/english
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Corriere_en(BasicNewsRecipe):
|
||||
title = 'Corriere della Sera in English'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Milan and Italy'
|
||||
oldest_article = 15
|
||||
publisher = 'Corriere della Sera'
|
||||
category = 'news, politics, Italy'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
language = 'en'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['base','object','link','embed','img'])
|
||||
,dict(name='div', attrs={'class':'news-goback'})
|
||||
,dict(name='ul', attrs={'class':'toolbar'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
||||
|
||||
feeds = [(u'Italian Life', u'http://www.corriere.it/rss/english.xml')]
|
||||
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['base','object','link','embed','img'])
|
||||
,dict(name='div', attrs={'class':'news-goback'})
|
||||
,dict(name='ul', attrs={'class':'toolbar'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
||||
|
||||
feeds = [(u'Italian Life', u'http://www.corriere.it/rss/english.xml')]
|
||||
|
||||
|
@ -1,56 +1,56 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.corriere.it
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Corriere_it(BasicNewsRecipe):
|
||||
title = 'Corriere della Sera'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Milan and Italy'
|
||||
oldest_article = 7
|
||||
publisher = 'Corriere della Sera'
|
||||
category = 'news, politics, Italy'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.corriere.it
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Corriere_it(BasicNewsRecipe):
|
||||
title = 'Corriere della Sera'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Milan and Italy'
|
||||
oldest_article = 7
|
||||
publisher = 'Corriere della Sera'
|
||||
category = 'news, politics, Italy'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
language = 'it'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['base','object','link','embed','img'])
|
||||
,dict(name='div', attrs={'class':'news-goback'})
|
||||
,dict(name='ul', attrs={'class':'toolbar'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
||||
|
||||
feeds = [
|
||||
(u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' )
|
||||
,(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' )
|
||||
,(u'Economia' , u'http://www.corriere.it/rss/economia.xml' )
|
||||
,(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml')
|
||||
,(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' )
|
||||
,(u'Politica' , u'http://www.corriere.it/rss/politica.xml' )
|
||||
,(u'Salute' , u'http://www.corriere.it/rss/salute.xml' )
|
||||
,(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' )
|
||||
,(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml')
|
||||
,(u'Sport' , u'http://www.corriere.it/rss/sport.xml' )
|
||||
]
|
||||
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['news-dettaglio article','article']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['base','object','link','embed','img'])
|
||||
,dict(name='div', attrs={'class':'news-goback'})
|
||||
,dict(name='ul', attrs={'class':'toolbar'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='p', attrs={'class':'footnotes'})
|
||||
|
||||
feeds = [
|
||||
(u'Ultimora' , u'http://www.corriere.it/rss/ultimora.xml' )
|
||||
,(u'Cronache' , u'http://www.corriere.it/rss/cronache.xml' )
|
||||
,(u'Economia' , u'http://www.corriere.it/rss/economia.xml' )
|
||||
,(u'Editoriali', u'http://www.corriere.it/rss/editoriali.xml')
|
||||
,(u'Esteri' , u'http://www.corriere.it/rss/esteri.xml' )
|
||||
,(u'Politica' , u'http://www.corriere.it/rss/politica.xml' )
|
||||
,(u'Salute' , u'http://www.corriere.it/rss/salute.xml' )
|
||||
,(u'Scienze' , u'http://www.corriere.it/rss/scienze.xml' )
|
||||
,(u'Spettacolo', u'http://www.corriere.it/rss/spettacoli.xml')
|
||||
,(u'Sport' , u'http://www.corriere.it/rss/sport.xml' )
|
||||
]
|
||||
|
||||
|
@ -7,7 +7,6 @@ Courrier International
|
||||
'''
|
||||
|
||||
import re
|
||||
from datetime import date
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CourrierInternational(BasicNewsRecipe):
|
||||
@ -21,12 +20,12 @@ class CourrierInternational(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
|
||||
html2lrf_options = ['--base-font-size', '10']
|
||||
|
||||
|
||||
feeds = [
|
||||
# Some articles requiring subscription fails on download.
|
||||
('A la Une', 'http://www.courrierinternational.com/rss/rss_a_la_une.xml'),
|
||||
]
|
||||
|
||||
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
|
||||
[
|
||||
#Handle Depeches
|
||||
@ -35,8 +34,8 @@ class CourrierInternational(BasicNewsRecipe):
|
||||
(r'.*<td [^>]*>(Courrier international.*?) <td width="10"><img src="/img/espaceur.gif"></td>.*', lambda match : '<html><body><table><tr><td>'+match.group(1)+'</body></html>'),
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
return re.sub('/[a-zA-Z]+\.asp','/imprimer.asp' ,url)
|
||||
|
||||
|
@ -1,62 +1,62 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
criticadigital.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CriticaDigital(BasicNewsRecipe):
|
||||
title = 'Critica de la Argentina'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
criticadigital.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CriticaDigital(BasicNewsRecipe):
|
||||
title = 'Critica de la Argentina'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
language = 'es'
|
||||
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , title
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'bloqueTitulosNoticia'})
|
||||
,dict(name='div', attrs={'id':'c453-1' })
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'box300' })
|
||||
,dict(name='div', style=True )
|
||||
,dict(name='div', attrs={'class':'titcomentario'})
|
||||
,dict(name='div', attrs={'class':'comentario' })
|
||||
,dict(name='div', attrs={'class':'paginador' })
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica' )
|
||||
,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia' )
|
||||
,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes' )
|
||||
,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos')
|
||||
,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo' )
|
||||
,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales' )
|
||||
,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad' )
|
||||
,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud' )
|
||||
,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia' )
|
||||
,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe' )
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.criticadigital.com/impresa/'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('div',attrs={'class':'tapa'})
|
||||
if link_item:
|
||||
cover_url = index + link_item.img['src']
|
||||
return cover_url
|
||||
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , title
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'bloqueTitulosNoticia'})
|
||||
,dict(name='div', attrs={'id':'c453-1' })
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'box300' })
|
||||
,dict(name='div', style=True )
|
||||
,dict(name='div', attrs={'class':'titcomentario'})
|
||||
,dict(name='div', attrs={'class':'comentario' })
|
||||
,dict(name='div', attrs={'class':'paginador' })
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica' )
|
||||
,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia' )
|
||||
,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes' )
|
||||
,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos')
|
||||
,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo' )
|
||||
,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales' )
|
||||
,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad' )
|
||||
,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud' )
|
||||
,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia' )
|
||||
,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe' )
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.criticadigital.com/impresa/'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('div',attrs={'class':'tapa'})
|
||||
if link_item:
|
||||
cover_url = index + link_item.img['src']
|
||||
return cover_url
|
||||
|
@ -1,45 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
newyorker.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class CubaDebate(BasicNewsRecipe):
|
||||
title = 'CubaDebate'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Contra el Terorismo Mediatico'
|
||||
oldest_article = 15
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
newyorker.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CubaDebate(BasicNewsRecipe):
|
||||
title = 'CubaDebate'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Contra el Terorismo Mediatico'
|
||||
oldest_article = 15
|
||||
language = 'es'
|
||||
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
publisher = 'Cubadebate'
|
||||
category = 'news, politics, Cuba'
|
||||
encoding = 'utf-8'
|
||||
extra_css = ' #BlogTitle{font-size: x-large; font-weight: bold} '
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : 'es'
|
||||
,'publisher' : publisher
|
||||
,'pretty_print': True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'Outline'})]
|
||||
remove_tags_after = dict(name='div',attrs={'id':'BlogContent'})
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print/'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
publisher = 'Cubadebate'
|
||||
category = 'news, politics, Cuba'
|
||||
encoding = 'utf-8'
|
||||
extra_css = ' #BlogTitle{font-size: x-large; font-weight: bold} '
|
||||
|
||||
conversion_options = {
|
||||
'comments' : description
|
||||
,'tags' : category
|
||||
,'language' : 'es'
|
||||
,'publisher' : publisher
|
||||
,'pretty_print': True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'Outline'})]
|
||||
remove_tags_after = dict(name='div',attrs={'id':'BlogContent'})
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.cubadebate.cu/feed/')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + 'print/'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
@ -1,34 +1,34 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheDailyMail(BasicNewsRecipe):
|
||||
title = u'The Daily Mail'
|
||||
oldest_article = 2
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheDailyMail(BasicNewsRecipe):
|
||||
title = u'The Daily Mail'
|
||||
oldest_article = 2
|
||||
language = 'en'
|
||||
|
||||
author = 'RufusA'
|
||||
simultaneous_downloads= 1
|
||||
max_articles_per_feed = 50
|
||||
|
||||
extra_css = 'h1 {text-align: left;}'
|
||||
|
||||
remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ]
|
||||
remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'})
|
||||
remove_tags_before = dict(name='div', attrs={'id':'content'})
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [
|
||||
(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
|
||||
(u'News', u'http://www.dailymail.co.uk/news/index.rss'),
|
||||
(u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
|
||||
(u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
|
||||
(u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
|
||||
(u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
|
||||
(u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
|
||||
(u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
|
||||
(u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
|
||||
(u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
|
||||
(u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
main = url.partition('?')[0]
|
||||
return main + '?printingPage=true'
|
||||
|
||||
author = 'RufusA'
|
||||
simultaneous_downloads= 1
|
||||
max_articles_per_feed = 50
|
||||
|
||||
extra_css = 'h1 {text-align: left;}'
|
||||
|
||||
remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ]
|
||||
remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'})
|
||||
remove_tags_before = dict(name='div', attrs={'id':'content'})
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [
|
||||
(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
|
||||
(u'News', u'http://www.dailymail.co.uk/news/index.rss'),
|
||||
(u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
|
||||
(u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
|
||||
(u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
|
||||
(u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
|
||||
(u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
|
||||
(u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
|
||||
(u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
|
||||
(u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
|
||||
(u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
main = url.partition('?')[0]
|
||||
return main + '?printingPage=true'
|
||||
|
@ -1,62 +1,62 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
danas.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'Danas'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Vesti'
|
||||
publisher = 'Danas d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
danas.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'Danas'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Vesti'
|
||||
publisher = 'Danas d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
use_embedded_content = False
|
||||
language = 'sr'
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
||||
,dict(name='div', attrs={'id':'comments'})
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
|
||||
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
||||
,dict(name='div', attrs={'id':'comments'})
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
|
||||
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
@ -1,76 +1,76 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.nieuwsblad.be
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class DeGentenaarOnline(BasicNewsRecipe):
|
||||
title = 'De Gentenaar Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Belgium in Dutch'
|
||||
publisher = 'De Gentenaar'
|
||||
category = 'news, politics, Belgium'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.nieuwsblad.be
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class DeGentenaarOnline(BasicNewsRecipe):
|
||||
title = 'De Gentenaar Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Belgium in Dutch'
|
||||
publisher = 'De Gentenaar'
|
||||
category = 'news, politics, Belgium'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'nl'
|
||||
|
||||
lang = 'nl-BE'
|
||||
direction = 'ltr'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='span', attrs={'id':['lblArticleTitle','lblArticleIntroduction','lblArticleMainText']})]
|
||||
remove_tags = [dict(name=['embed','object'])]
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Snelnieuws' , u'http://feeds.nieuwsblad.be/nieuws/snelnieuws' )
|
||||
,(u'Binnenland' , u'http://feeds.nieuwsblad.be/nieuws/binnenland' )
|
||||
,(u'Buitenland' , u'http://feeds.nieuwsblad.be/nieuwsblad/buitenland' )
|
||||
,(u'Economie' , u'http://feeds.nieuwsblad.be/economie/home' )
|
||||
,(u'Economie' , u'http://feeds.nieuwsblad.be/economie/home' )
|
||||
,(u'Algemeen' , u'http://feeds.nieuwsblad.be/life/algemeen' )
|
||||
,(u'Film' , u'http://feeds.nieuwsblad.be/life/film' )
|
||||
,(u'Boek' , u'http://feeds.nieuwsblad.be/life/boeken' )
|
||||
,(u'Muziek' , u'http://feeds.nieuwsblad.be/life/muziek' )
|
||||
,(u'Podium' , u'http://feeds.nieuwsblad.be/life/podium' )
|
||||
,(u'TV & radio' , u'http://feeds.nieuwsblad.be/life/tv' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/Detail.aspx?articleid','/PrintArticle.aspx?ArticleID')
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
del soup.body['onload']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('span'):
|
||||
item.name='div'
|
||||
if item.has_key('id') and item['id'] == 'lblArticleTitle':
|
||||
item.name='h3'
|
||||
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return soup
|
||||
|
||||
|
||||
lang = 'nl-BE'
|
||||
direction = 'ltr'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='span', attrs={'id':['lblArticleTitle','lblArticleIntroduction','lblArticleMainText']})]
|
||||
remove_tags = [dict(name=['embed','object'])]
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Snelnieuws' , u'http://feeds.nieuwsblad.be/nieuws/snelnieuws' )
|
||||
,(u'Binnenland' , u'http://feeds.nieuwsblad.be/nieuws/binnenland' )
|
||||
,(u'Buitenland' , u'http://feeds.nieuwsblad.be/nieuwsblad/buitenland' )
|
||||
,(u'Economie' , u'http://feeds.nieuwsblad.be/economie/home' )
|
||||
,(u'Economie' , u'http://feeds.nieuwsblad.be/economie/home' )
|
||||
,(u'Algemeen' , u'http://feeds.nieuwsblad.be/life/algemeen' )
|
||||
,(u'Film' , u'http://feeds.nieuwsblad.be/life/film' )
|
||||
,(u'Boek' , u'http://feeds.nieuwsblad.be/life/boeken' )
|
||||
,(u'Muziek' , u'http://feeds.nieuwsblad.be/life/muziek' )
|
||||
,(u'Podium' , u'http://feeds.nieuwsblad.be/life/podium' )
|
||||
,(u'TV & radio' , u'http://feeds.nieuwsblad.be/life/tv' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/Detail.aspx?articleid','/PrintArticle.aspx?ArticleID')
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
del soup.body['onload']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('span'):
|
||||
item.name='div'
|
||||
if item.has_key('id') and item['id'] == 'lblArticleTitle':
|
||||
item.name='h3'
|
||||
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return soup
|
||||
|
||||
|
@ -1,69 +1,69 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||
|
||||
''' http://www.derstandard.at - Austrian Newspaper '''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DerStandardRecipe(BasicNewsRecipe):
|
||||
title = u'derStandard'
|
||||
__author__ = 'Gerhard Aigner'
|
||||
description = u'Nachrichten aus Österreich'
|
||||
publisher ='derStandard.at'
|
||||
category = 'news, politics, nachrichten, Austria'
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
lang = 'de-AT'
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||
|
||||
''' http://www.derstandard.at - Austrian Newspaper '''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DerStandardRecipe(BasicNewsRecipe):
|
||||
title = u'derStandard'
|
||||
__author__ = 'Gerhard Aigner'
|
||||
description = u'Nachrichten aus Österreich'
|
||||
publisher ='derStandard.at'
|
||||
category = 'news, politics, nachrichten, Austria'
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
lang = 'de-AT'
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
language = 'de'
|
||||
|
||||
recursions = 0
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
|
||||
(u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
|
||||
(u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
|
||||
(u'Web', u'http://derstandard.at/?page=rss&ressort=webstandard'),
|
||||
(u'Sport', u'http://derstandard.at/?page=rss&ressort=sport'),
|
||||
(u'Panorama', u'http://derstandard.at/?page=rss&ressort=panorama'),
|
||||
(u'Etat', u'http://derstandard.at/?page=rss&ressort=etat'),
|
||||
(u'Kultur', u'http://derstandard.at/?page=rss&ressort=kultur'),
|
||||
(u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
|
||||
(u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
|
||||
(u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
|
||||
remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
|
||||
dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'\[[\d]*\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('?id=', 'txt/?id=')
|
||||
|
||||
def get_article_url(self, article):
|
||||
'''if the article links to a index page (ressort) or a picture gallery
|
||||
(ansichtssache), don't add it'''
|
||||
if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
|
||||
return None
|
||||
return article.link
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
|
||||
recursions = 0
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u'International', u'http://derstandard.at/?page=rss&ressort=internationalpolitik'),
|
||||
(u'Inland', u'http://derstandard.at/?page=rss&ressort=innenpolitik'),
|
||||
(u'Wirtschaft', u'http://derstandard.at/?page=rss&ressort=investor'),
|
||||
(u'Web', u'http://derstandard.at/?page=rss&ressort=webstandard'),
|
||||
(u'Sport', u'http://derstandard.at/?page=rss&ressort=sport'),
|
||||
(u'Panorama', u'http://derstandard.at/?page=rss&ressort=panorama'),
|
||||
(u'Etat', u'http://derstandard.at/?page=rss&ressort=etat'),
|
||||
(u'Kultur', u'http://derstandard.at/?page=rss&ressort=kultur'),
|
||||
(u'Wissenschaft', u'http://derstandard.at/?page=rss&ressort=wissenschaft'),
|
||||
(u'Gesundheit', u'http://derstandard.at/?page=rss&ressort=gesundheit'),
|
||||
(u'Bildung', u'http://derstandard.at/?page=rss&ressort=subildung')]
|
||||
remove_tags = [dict(name='div'), dict(name='a'), dict(name='link'), dict(name='meta'),
|
||||
dict(name='form',attrs={'name':'sitesearch'}), dict(name='hr')]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'\[[\d]*\]', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'bgcolor="#\w{3,6}"', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('?id=', 'txt/?id=')
|
||||
|
||||
def get_article_url(self, article):
|
||||
'''if the article links to a index page (ressort) or a picture gallery
|
||||
(ansichtssache), don't add it'''
|
||||
if (article.link.count('ressort') > 0 or article.title.lower().count('ansichtssache') > 0):
|
||||
return None
|
||||
return article.link
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
@ -1,72 +1,72 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Diagonales(BasicNewsRecipe):
|
||||
title = 'Diagonales'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El nuevo diario de La Plata'
|
||||
publisher = 'ElArgentino.com'
|
||||
category = 'news, politics, Argentina, La Plata'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Diagonales(BasicNewsRecipe):
|
||||
title = 'Diagonales'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El nuevo diario de La Plata'
|
||||
publisher = 'ElArgentino.com'
|
||||
category = 'news, politics, Argentina, La Plata'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
INDEX = 'http://www.elargentino.com/medios/122/Diagonales.html'
|
||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=122&Content-Type=text/xml&ChannelDesc=Diagonales')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, article_part = url.partition('/nota-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||
if cover_item:
|
||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||
return cover_url
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
base, sep, rest = url.rpartition('?Id=')
|
||||
img, sep2, rrest = rest.partition('&')
|
||||
return base + sep + img
|
||||
|
||||
lang = 'es-AR'
|
||||
direction = 'ltr'
|
||||
INDEX = 'http://www.elargentino.com/medios/122/Diagonales.html'
|
||||
extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} '
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})]
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=122&Content-Type=text/xml&ChannelDesc=Diagonales')]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, article_part = url.partition('/nota-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('div',attrs={'class':'colder'})
|
||||
if cover_item:
|
||||
clean_url = self.image_url_processor(None,cover_item.div.img['src'])
|
||||
cover_url = 'http://www.elargentino.com' + clean_url + '&height=600'
|
||||
return cover_url
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
base, sep, rest = url.rpartition('?Id=')
|
||||
img, sep2, rrest = rest.partition('&')
|
||||
return base + sep + img
|
||||
|
@ -1,73 +1,73 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||
|
||||
''' http://www.diepresse.at - Austrian Newspaper '''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DiePresseRecipe(BasicNewsRecipe):
|
||||
title = u'diePresse'
|
||||
__author__ = 'Gerhard Aigner'
|
||||
description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.'
|
||||
publisher ='DiePresse.com'
|
||||
category = 'news, politics, nachrichten, Austria'
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
lang = 'de-AT'
|
||||
no_stylesheets = True
|
||||
encoding = 'ISO-8859-1'
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||
|
||||
''' http://www.diepresse.at - Austrian Newspaper '''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DiePresseRecipe(BasicNewsRecipe):
|
||||
title = u'diePresse'
|
||||
__author__ = 'Gerhard Aigner'
|
||||
description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.'
|
||||
publisher ='DiePresse.com'
|
||||
category = 'news, politics, nachrichten, Austria'
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
lang = 'de-AT'
|
||||
no_stylesheets = True
|
||||
encoding = 'ISO-8859-1'
|
||||
language = 'de'
|
||||
|
||||
recursions = 0
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'Textversion', re.DOTALL), lambda match: ''),
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='hr'),
|
||||
dict(name='br'),
|
||||
dict(name='small'),
|
||||
dict(name='img'),
|
||||
dict(name='div', attrs={'class':'textnavi'}),
|
||||
dict(name='h1', attrs={'class':'titel'}),
|
||||
dict(name='a', attrs={'class':'print'}),
|
||||
dict(name='div', attrs={'class':'hline'})]
|
||||
|
||||
feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'),
|
||||
(u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'),
|
||||
(u'Europa', u'http://diepresse.com/rss/EU'),
|
||||
(u'Panorama', u'http://diepresse.com/rss/Panorama'),
|
||||
(u'Sport', u'http://diepresse.com/rss/Sport'),
|
||||
(u'Kultur', u'http://diepresse.com/rss/Kultur'),
|
||||
(u'Leben', u'http://diepresse.com/rss/Leben'),
|
||||
(u'Tech', u'http://diepresse.com/rss/Tech'),
|
||||
(u'Wissenschaft', u'http://diepresse.com/rss/Science'),
|
||||
(u'Bildung', u'http://diepresse.com/rss/Bildung'),
|
||||
(u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'),
|
||||
(u'Recht', u'http://diepresse.com/rss/Recht'),
|
||||
(u'Spectrum', u'http://diepresse.com/rss/Spectrum'),
|
||||
(u'Meinung', u'http://diepresse.com/rss/Meinung')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('home','text/home')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
|
||||
recursions = 0
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'Textversion', re.DOTALL), lambda match: ''),
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='hr'),
|
||||
dict(name='br'),
|
||||
dict(name='small'),
|
||||
dict(name='img'),
|
||||
dict(name='div', attrs={'class':'textnavi'}),
|
||||
dict(name='h1', attrs={'class':'titel'}),
|
||||
dict(name='a', attrs={'class':'print'}),
|
||||
dict(name='div', attrs={'class':'hline'})]
|
||||
|
||||
feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'),
|
||||
(u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'),
|
||||
(u'Europa', u'http://diepresse.com/rss/EU'),
|
||||
(u'Panorama', u'http://diepresse.com/rss/Panorama'),
|
||||
(u'Sport', u'http://diepresse.com/rss/Sport'),
|
||||
(u'Kultur', u'http://diepresse.com/rss/Kultur'),
|
||||
(u'Leben', u'http://diepresse.com/rss/Leben'),
|
||||
(u'Tech', u'http://diepresse.com/rss/Tech'),
|
||||
(u'Wissenschaft', u'http://diepresse.com/rss/Science'),
|
||||
(u'Bildung', u'http://diepresse.com/rss/Bildung'),
|
||||
(u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'),
|
||||
(u'Recht', u'http://diepresse.com/rss/Recht'),
|
||||
(u'Spectrum', u'http://diepresse.com/rss/Spectrum'),
|
||||
(u'Meinung', u'http://diepresse.com/rss/Meinung')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('home','text/home')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
@ -1,69 +1,69 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
dnevniavaz.ba
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class DnevniAvaz(BasicNewsRecipe):
|
||||
title = 'Dnevni Avaz'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Latest news from Bosnia'
|
||||
publisher = 'Dnevni Avaz'
|
||||
category = 'news, politics, Bosnia and Herzegovina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
|
||||
lang = 'bs-BA'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
dnevniavaz.ba
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class DnevniAvaz(BasicNewsRecipe):
|
||||
title = 'Dnevni Avaz'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Latest news from Bosnia'
|
||||
publisher = 'Dnevni Avaz'
|
||||
category = 'news, politics, Bosnia and Herzegovina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.dnevniavaz.ba/img/logo.gif'
|
||||
lang = 'bs-BA'
|
||||
language = 'bs'
|
||||
|
||||
direction = 'ltr'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['fullarticle-title','fullarticle-leading','fullarticle-date','fullarticle-text','articleauthor']})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','base'])]
|
||||
|
||||
feeds = [
|
||||
(u'Najnovije' , u'http://www.dnevniavaz.ba/rss/novo' )
|
||||
,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
|
||||
]
|
||||
|
||||
def replace_tagname(self,soup,tagname,tagid,newtagname):
|
||||
headtag = soup.find(tagname,attrs={'id':tagid})
|
||||
if headtag:
|
||||
headtag.name = newtagname
|
||||
return
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
self.replace_tagname(soup,'div','fullarticle-title' ,'h1')
|
||||
self.replace_tagname(soup,'div','fullarticle-leading','h3')
|
||||
self.replace_tagname(soup,'div','fullarticle-date' ,'h5')
|
||||
return self.adeify_images(soup)
|
||||
|
||||
direction = 'ltr'
|
||||
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['fullarticle-title','fullarticle-leading','fullarticle-date','fullarticle-text','articleauthor']})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','base'])]
|
||||
|
||||
feeds = [
|
||||
(u'Najnovije' , u'http://www.dnevniavaz.ba/rss/novo' )
|
||||
,(u'Najpopularnije', u'http://www.dnevniavaz.ba/rss/popularno')
|
||||
]
|
||||
|
||||
def replace_tagname(self,soup,tagname,tagid,newtagname):
|
||||
headtag = soup.find(tagname,attrs={'id':tagid})
|
||||
if headtag:
|
||||
headtag.name = newtagname
|
||||
return
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
self.replace_tagname(soup,'div','fullarticle-title' ,'h1')
|
||||
self.replace_tagname(soup,'div','fullarticle-leading','h3')
|
||||
self.replace_tagname(soup,'div','fullarticle-date' ,'h5')
|
||||
return self.adeify_images(soup)
|
||||
|
@ -1,75 +1,75 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
dnevnik.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class DnevnikCro(BasicNewsRecipe):
|
||||
title = 'Dnevnik - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Vijesti iz Hrvatske"
|
||||
publisher = 'Dnevnik.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 4
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
dnevnik.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class DnevnikCro(BasicNewsRecipe):
|
||||
title = 'Dnevnik - Hr'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Vijesti iz Hrvatske"
|
||||
publisher = 'Dnevnik.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
delay = 4
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'hr'
|
||||
|
||||
lang = 'hr-HR'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed'])
|
||||
,dict(name='div', attrs={'class':'menu'})
|
||||
,dict(name='div', attrs={'id':'video'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'id':'content'})
|
||||
|
||||
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
||||
lang = 'hr-HR'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed'])
|
||||
,dict(name='div', attrs={'class':'menu'})
|
||||
,dict(name='div', attrs={'id':'video'})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'id':'content'})
|
||||
|
||||
feeds = [(u'Vijesti', u'http://rss.dnevnik.hr/index.rss')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
@ -1,59 +1,59 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
e-novine.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class E_novine(BasicNewsRecipe):
|
||||
title = 'E-Novine'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Serbia'
|
||||
publisher = 'E-novine'
|
||||
category = 'news, politics, Balcans'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1250'
|
||||
use_embedded_content = False
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
e-novine.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class E_novine(BasicNewsRecipe):
|
||||
title = 'E-Novine'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Serbia'
|
||||
publisher = 'E-novine'
|
||||
category = 'news, politics, Balcans'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1250'
|
||||
use_embedded_content = False
|
||||
language = 'sr'
|
||||
|
||||
lang = 'sr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
||||
|
||||
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
|
||||
if ftag:
|
||||
it = ftag.div
|
||||
it.extract()
|
||||
ftag.div.extract()
|
||||
ftag.insert(0,it)
|
||||
return soup
|
||||
|
||||
lang = 'sr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
||||
|
||||
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
|
||||
if ftag:
|
||||
it = ftag.div
|
||||
it.extract()
|
||||
ftag.div.extract()
|
||||
ftag.insert(0,it)
|
||||
return soup
|
||||
|
@ -1,32 +1,32 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
EcoGeek.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EcoGeek(BasicNewsRecipe):
|
||||
title = 'EcoGeek'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'EcoGeek - Technology for the Environment Blog Feed'
|
||||
publisher = 'EcoGeek'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
EcoGeek.org
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EcoGeek(BasicNewsRecipe):
|
||||
title = 'EcoGeek'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'EcoGeek - Technology for the Environment Blog Feed'
|
||||
publisher = 'EcoGeek'
|
||||
language = 'en'
|
||||
|
||||
category = 'news, ecology, blog'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u'Posts', u'http://feeds2.feedburner.com/EcoGeek')]
|
||||
|
||||
category = 'news, ecology, blog'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u'Posts', u'http://feeds2.feedburner.com/EcoGeek')]
|
||||
|
@ -1,62 +1,61 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
emol.com
|
||||
'''
|
||||
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
emol.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElMercurio(BasicNewsRecipe):
|
||||
title = 'El Mercurio online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El sitio de noticias online de Chile'
|
||||
publisher = 'El Mercurio'
|
||||
category = 'news, politics, Chile'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
class ElMercurio(BasicNewsRecipe):
|
||||
title = 'El Mercurio online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El sitio de noticias online de Chile'
|
||||
publisher = 'El Mercurio'
|
||||
category = 'news, politics, Chile'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'despliegue-txt_750px'})
|
||||
,dict(name='div', attrs={'id':'div_cuerpo_participa'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
|
||||
,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Noticias de ultima hora', u'http://www.emol.com/rss20/rss.asp?canal=0')
|
||||
,(u'Nacional', u'http://www.emol.com/rss20/rss.asp?canal=1')
|
||||
,(u'Mundo', u'http://www.emol.com/rss20/rss.asp?canal=2')
|
||||
,(u'Deportes', u'http://www.emol.com/rss20/rss.asp?canal=4')
|
||||
,(u'Magazine', u'http://www.emol.com/rss20/rss.asp?canal=6')
|
||||
,(u'Tecnologia', u'http://www.emol.com/rss20/rss.asp?canal=5')
|
||||
,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'despliegue-txt_750px'})
|
||||
,dict(name='div', attrs={'id':'div_cuerpo_participa'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
|
||||
,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Noticias de ultima hora', u'http://www.emol.com/rss20/rss.asp?canal=0')
|
||||
,(u'Nacional', u'http://www.emol.com/rss20/rss.asp?canal=1')
|
||||
,(u'Mundo', u'http://www.emol.com/rss20/rss.asp?canal=2')
|
||||
,(u'Deportes', u'http://www.emol.com/rss20/rss.asp?canal=4')
|
||||
,(u'Magazine', u'http://www.emol.com/rss20/rss.asp?canal=6')
|
||||
,(u'Tecnologia', u'http://www.emol.com/rss20/rss.asp?canal=5')
|
||||
,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = 'es'
|
||||
|
@ -1,66 +1,66 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
eluniversal.com.mx
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElUniversal(BasicNewsRecipe):
|
||||
title = 'El Universal'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Mexico'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
publisher = 'El Universal'
|
||||
category = 'news, politics, Mexico'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
eluniversal.com.mx
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElUniversal(BasicNewsRecipe):
|
||||
title = 'El Universal'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Mexico'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
publisher = 'El Universal'
|
||||
category = 'news, politics, Mexico'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
language = 'es'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [
|
||||
(u'Minuto por Minuto', u'http://www.eluniversal.com.mx/rss/universalmxm.xml' )
|
||||
,(u'Mundo' , u'http://www.eluniversal.com.mx/rss/mundo.xml' )
|
||||
,(u'Mexico' , u'http://www.eluniversal.com.mx/rss/mexico.xml' )
|
||||
,(u'Estados' , u'http://www.eluniversal.com.mx/rss/estados.xml' )
|
||||
,(u'Finanzas' , u'http://www.eluniversal.com.mx/rss/finanzas.xml' )
|
||||
,(u'Deportes' , u'http://www.eluniversal.com.mx/rss/deportes.xml' )
|
||||
,(u'Espectaculos' , u'http://www.eluniversal.com.mx/rss/espectaculos.xml' )
|
||||
,(u'Cultura' , u'http://www.eluniversal.com.mx/rss/cultura.xml' )
|
||||
,(u'Ciencia' , u'http://www.eluniversal.com.mx/rss/ciencia.xml' )
|
||||
,(u'Computacion' , u'http://www.eluniversal.com.mx/rss/computo.xml' )
|
||||
,(u'Sociedad' , u'http://www.eluniversal.com.mx/rss/sociedad.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/notas/','/notas/vi_')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-MX"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(font=True):
|
||||
del item['font']
|
||||
for item in soup.findAll(face=True):
|
||||
del item['face']
|
||||
for item in soup.findAll(helvetica=True):
|
||||
del item['helvetica']
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
remove_tags = [dict(name='link')]
|
||||
|
||||
feeds = [
|
||||
(u'Minuto por Minuto', u'http://www.eluniversal.com.mx/rss/universalmxm.xml' )
|
||||
,(u'Mundo' , u'http://www.eluniversal.com.mx/rss/mundo.xml' )
|
||||
,(u'Mexico' , u'http://www.eluniversal.com.mx/rss/mexico.xml' )
|
||||
,(u'Estados' , u'http://www.eluniversal.com.mx/rss/estados.xml' )
|
||||
,(u'Finanzas' , u'http://www.eluniversal.com.mx/rss/finanzas.xml' )
|
||||
,(u'Deportes' , u'http://www.eluniversal.com.mx/rss/deportes.xml' )
|
||||
,(u'Espectaculos' , u'http://www.eluniversal.com.mx/rss/espectaculos.xml' )
|
||||
,(u'Cultura' , u'http://www.eluniversal.com.mx/rss/cultura.xml' )
|
||||
,(u'Ciencia' , u'http://www.eluniversal.com.mx/rss/ciencia.xml' )
|
||||
,(u'Computacion' , u'http://www.eluniversal.com.mx/rss/computo.xml' )
|
||||
,(u'Sociedad' , u'http://www.eluniversal.com.mx/rss/sociedad.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/notas/','/notas/vi_')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-MX"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(font=True):
|
||||
del item['font']
|
||||
for item in soup.findAll(face=True):
|
||||
del item['face']
|
||||
for item in soup.findAll(helvetica=True):
|
||||
del item['helvetica']
|
||||
return soup
|
||||
|
||||
|
@ -1,62 +1,62 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElArgentino(BasicNewsRecipe):
|
||||
title = 'ElArgentino.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
publisher = 'ElArgentino.com'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElArgentino(BasicNewsRecipe):
|
||||
title = 'ElArgentino.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
publisher = 'ElArgentino.com'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
|
||||
language = 'es'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'noprint' })
|
||||
,dict(name='div', attrs={'class':'encabezadoImprimir'})
|
||||
,dict(name='a' , attrs={'target':'_blank' })
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Portada' , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home' )
|
||||
,(u'Pais' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs' )
|
||||
,(u'Economia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa' )
|
||||
,(u'Mundo' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=113&Content-Type=text/xml&ChannelDesc=Mundo' )
|
||||
,(u'Tecnologia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=118&Content-Type=text/xml&ChannelDesc=Tecnolog%C3%ADa' )
|
||||
,(u'Espectaculos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=114&Content-Type=text/xml&ChannelDesc=Espect%C3%A1culos')
|
||||
,(u'Deportes' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=106&Content-Type=text/xml&ChannelDesc=Deportes' )
|
||||
,(u'Sociedad' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=109&Content-Type=text/xml&ChannelDesc=Sociedad' )
|
||||
,(u'Entrevistas' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=115&Content-Type=text/xml&ChannelDesc=Entrevistas' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, article_part = url.partition('/nota-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'noprint' })
|
||||
,dict(name='div', attrs={'class':'encabezadoImprimir'})
|
||||
,dict(name='a' , attrs={'target':'_blank' })
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Portada' , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home' )
|
||||
,(u'Pais' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs' )
|
||||
,(u'Economia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa' )
|
||||
,(u'Mundo' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=113&Content-Type=text/xml&ChannelDesc=Mundo' )
|
||||
,(u'Tecnologia' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=118&Content-Type=text/xml&ChannelDesc=Tecnolog%C3%ADa' )
|
||||
,(u'Espectaculos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=114&Content-Type=text/xml&ChannelDesc=Espect%C3%A1culos')
|
||||
,(u'Deportes' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=106&Content-Type=text/xml&ChannelDesc=Deportes' )
|
||||
,(u'Sociedad' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=109&Content-Type=text/xml&ChannelDesc=Sociedad' )
|
||||
,(u'Entrevistas' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=115&Content-Type=text/xml&ChannelDesc=Entrevistas' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, article_part = url.partition('/nota-')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -1,72 +1,72 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
cronista.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElCronista(BasicNewsRecipe):
|
||||
title = 'El Cronista'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina'
|
||||
oldest_article = 2
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
cronista.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElCronista(BasicNewsRecipe):
|
||||
title = 'El Cronista'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina'
|
||||
oldest_article = 2
|
||||
language = 'es'
|
||||
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , title
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='table', attrs={'width':'100%' })
|
||||
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
|
||||
|
||||
feeds = [
|
||||
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
|
||||
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
|
||||
,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' )
|
||||
,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' )
|
||||
,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' )
|
||||
,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' )
|
||||
,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml')
|
||||
,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' )
|
||||
,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' )
|
||||
,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' )
|
||||
,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, rest = url.partition('.com/notas/')
|
||||
article_id, lsep, rrest = rest.partition('-')
|
||||
return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
soup.head.base.extract()
|
||||
htext = soup.find('h1',attrs={'class':'Arialgris16normal'})
|
||||
htext.name = 'p'
|
||||
soup.prettify()
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.cronista.com/contenidos/'
|
||||
soup = self.index_to_soup(index + 'ee.html')
|
||||
link_item = soup.find('a',attrs={'href':"javascript:Close()"})
|
||||
if link_item:
|
||||
cover_url = index + link_item.img['src']
|
||||
return cover_url
|
||||
|
||||
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , title
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='table', attrs={'width':'100%' })
|
||||
,dict(name='h1' , attrs={'class':'Arialgris16normal'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})]
|
||||
|
||||
feeds = [
|
||||
(u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' )
|
||||
,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' )
|
||||
,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' )
|
||||
,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' )
|
||||
,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' )
|
||||
,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' )
|
||||
,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml')
|
||||
,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' )
|
||||
,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' )
|
||||
,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' )
|
||||
,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' )
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
main, sep, rest = url.partition('.com/notas/')
|
||||
article_id, lsep, rrest = rest.partition('-')
|
||||
return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
soup.head.base.extract()
|
||||
htext = soup.find('h1',attrs={'class':'Arialgris16normal'})
|
||||
htext.name = 'p'
|
||||
soup.prettify()
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.cronista.com/contenidos/'
|
||||
soup = self.index_to_soup(index + 'ee.html')
|
||||
link_item = soup.find('a',attrs={'href':"javascript:Close()"})
|
||||
if link_item:
|
||||
cover_url = index + link_item.img['src']
|
||||
return cover_url
|
||||
|
||||
|
@ -1,61 +1,60 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elmundo.es
|
||||
'''
|
||||
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elmundo.es
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElMundo(BasicNewsRecipe):
|
||||
title = 'El Mundo'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Spain'
|
||||
publisher = 'El Mundo'
|
||||
category = 'news, politics, Spain'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'iso8859_15'
|
||||
cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
class ElMundo(BasicNewsRecipe):
|
||||
title = 'El Mundo'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Spain'
|
||||
publisher = 'El Mundo'
|
||||
category = 'news, politics, Spain'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'iso8859_15'
|
||||
cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['bloqueprincipal','noticia']})
|
||||
,dict(name='div', attrs={'class':['contenido_noticia_01']})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['herramientas','publicidad_google']})
|
||||
,dict(name='div', attrs={'id':'modulo_multimedia' })
|
||||
,dict(name='ul', attrs={'class':'herramientas' })
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
|
||||
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
|
||||
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
|
||||
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
|
||||
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
|
||||
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
|
||||
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['bloqueprincipal','noticia']})
|
||||
,dict(name='div', attrs={'class':['contenido_noticia_01']})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['herramientas','publicidad_google']})
|
||||
,dict(name='div', attrs={'id':'modulo_multimedia' })
|
||||
,dict(name='ul', attrs={'class':'herramientas' })
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
|
||||
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
|
||||
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
|
||||
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
|
||||
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
|
||||
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
|
||||
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = 'es'
|
||||
|
@ -1,56 +1,56 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elperiodico.cat
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class ElPeriodico_cat(BasicNewsRecipe):
|
||||
title = 'El Periodico de Catalunya'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias desde Catalunya'
|
||||
publisher = 'elperiodico.cat'
|
||||
category = 'news, politics, Spain, Catalunya'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
delay = 1
|
||||
encoding = 'cp1252'
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elperiodico.cat
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class ElPeriodico_cat(BasicNewsRecipe):
|
||||
title = 'El Periodico de Catalunya'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias desde Catalunya'
|
||||
publisher = 'elperiodico.cat'
|
||||
category = 'news, politics, Spain, Catalunya'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
delay = 1
|
||||
encoding = 'cp1252'
|
||||
language = 'ca'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u"Tota l'edició", u'http://www.elperiodico.cat/rss.asp?id=46')]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script'])
|
||||
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
||||
,dict(name='div', attrs={'id':'inferiores'})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/default.asp?','/print.asp?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u"Tota l'edició", u'http://www.elperiodico.cat/rss.asp?id=46')]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script'])
|
||||
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
||||
,dict(name='div', attrs={'id':'inferiores'})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/default.asp?','/print.asp?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
@ -1,56 +1,56 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elperiodico.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class ElPeriodico_esp(BasicNewsRecipe):
|
||||
title = 'El Periodico de Catalunya'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias desde Catalunya'
|
||||
publisher = 'elperiodico.com'
|
||||
category = 'news, politics, Spain, Catalunya'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
delay = 1
|
||||
encoding = 'cp1252'
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elperiodico.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class ElPeriodico_esp(BasicNewsRecipe):
|
||||
title = 'El Periodico de Catalunya'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias desde Catalunya'
|
||||
publisher = 'elperiodico.com'
|
||||
category = 'news, politics, Spain, Catalunya'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
delay = 1
|
||||
encoding = 'cp1252'
|
||||
language = 'es'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u"Toda la edición", u'http://www.elperiodico.com/rss.asp?id=46')]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script'])
|
||||
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
||||
,dict(name='div', attrs={'id':'inferiores'})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/default.asp?','/print.asp?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [(u"Toda la edición", u'http://www.elperiodico.com/rss.asp?id=46')]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script'])
|
||||
,dict(name='ul',attrs={'class':'herramientasDeNoticia'})
|
||||
,dict(name='div', attrs={'id':'inferiores'})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('/default.asp?','/print.asp?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
@ -1,53 +1,53 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.tiempo.hn
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||
|
||||
class ElTiempoHn(BasicNewsRecipe):
|
||||
title = 'El Tiempo - Honduras'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Honduras y mundo'
|
||||
publisher = 'El Tiempo'
|
||||
category = 'news, politics, Honduras'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.tiempo.hn
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class ElTiempoHn(BasicNewsRecipe):
|
||||
title = 'El Tiempo - Honduras'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Honduras y mundo'
|
||||
publisher = 'El Tiempo'
|
||||
category = 'news, politics, Honduras'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
language = 'es'
|
||||
|
||||
lang = 'es-HN'
|
||||
direction = 'ltr'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} img {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em}"'
|
||||
|
||||
remove_tags = [dict(name=['form','object','embed','base'])]
|
||||
|
||||
keep_only_tags = [dict(name='td' , attrs={'id':'mainbodycont'})]
|
||||
|
||||
feeds = [(u'Noticias', u'http://www.tiempo.hn/index.php?format=feed&type=rss')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
|
||||
lang = 'es-HN'
|
||||
direction = 'ltr'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\npretty_print=True\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} img {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em}"'
|
||||
|
||||
remove_tags = [dict(name=['form','object','embed','base'])]
|
||||
|
||||
keep_only_tags = [dict(name='td' , attrs={'id':'mainbodycont'})]
|
||||
|
||||
feeds = [(u'Noticias', u'http://www.tiempo.hn/index.php?format=feed&type=rss')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return self.adeify_images(soup)
|
||||
|
@ -1,32 +1,31 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
engadget.com
|
||||
'''
|
||||
|
||||
import string,re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Engadget(BasicNewsRecipe):
|
||||
title = u'Engadget'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Tech news'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
engadget.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Engadget(BasicNewsRecipe):
|
||||
title = u'Engadget'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Tech news'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':'post'}) ]
|
||||
remove_tags = [
|
||||
dict(name='object')
|
||||
,dict(name='div', attrs={'class':'postmeta'})
|
||||
,dict(name='div', attrs={'class':'quigoads'})
|
||||
]
|
||||
|
||||
|
||||
feeds = [ (u'Posts', u'http://www.engadget.com/rss.xml')]
|
||||
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':'post'}) ]
|
||||
remove_tags = [
|
||||
dict(name='object')
|
||||
,dict(name='div', attrs={'class':'postmeta'})
|
||||
,dict(name='div', attrs={'class':'quigoads'})
|
||||
]
|
||||
|
||||
|
||||
feeds = [ (u'Posts', u'http://www.engadget.com/rss.xml')]
|
||||
|
||||
|
@ -1,63 +1,63 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
www.esquire.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Esquire(BasicNewsRecipe):
|
||||
title = 'Esquire'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Esquire: Man at His Best'
|
||||
publisher = 'Hearst Communications, Inc.'
|
||||
category = 'magazine, men, women we love, style, the guide, sex, screen'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1250'
|
||||
use_embedded_content = False
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
www.esquire.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Esquire(BasicNewsRecipe):
|
||||
title = 'Esquire'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Esquire: Man at His Best'
|
||||
publisher = 'Hearst Communications, Inc.'
|
||||
category = 'magazine, men, women we love, style, the guide, sex, screen'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1250'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
|
||||
lang = 'en-US'
|
||||
cover_url = strftime('http://www.esquire.com/cm/esquire/cover-images/%Y_') + strftime('%m').strip('0') + '.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
||||
|
||||
feeds = [
|
||||
(u'Style' , u'http://www.esquire.com/style/rss/' )
|
||||
,(u'Women' , u'http://www.esquire.com/women/rss/' )
|
||||
,(u'Features' , u'http://www.esquire.com/features/rss/' )
|
||||
,(u'Fiction' , u'http://www.esquire.com/fiction/rss/' )
|
||||
,(u'Frontpage', u'http://www.esquire.com/rss/' )
|
||||
]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
rest = url.rpartition('?')[0]
|
||||
article = rest.rpartition('/')[2]
|
||||
return 'http://www.esquire.com/print-this/' + article
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
lang = 'en-US'
|
||||
cover_url = strftime('http://www.esquire.com/cm/esquire/cover-images/%Y_') + strftime('%m').strip('0') + '.jpg'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
||||
|
||||
feeds = [
|
||||
(u'Style' , u'http://www.esquire.com/style/rss/' )
|
||||
,(u'Women' , u'http://www.esquire.com/women/rss/' )
|
||||
,(u'Features' , u'http://www.esquire.com/features/rss/' )
|
||||
,(u'Fiction' , u'http://www.esquire.com/fiction/rss/' )
|
||||
,(u'Frontpage', u'http://www.esquire.com/rss/' )
|
||||
]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
rest = url.rpartition('?')[0]
|
||||
article = rest.rpartition('/')[2]
|
||||
return 'http://www.esquire.com/print-this/' + article
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -1,58 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
exiledonline.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Exiled(BasicNewsRecipe):
|
||||
title = 'Exiled Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
|
||||
publisher = 'Exiled Online'
|
||||
category = 'news, politics, international'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
exiledonline.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Exiled(BasicNewsRecipe):
|
||||
title = 'Exiled Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
|
||||
publisher = 'Exiled Online'
|
||||
category = 'news, politics, international'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
language = 'en'
|
||||
|
||||
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--base-font-size', '10'
|
||||
, '--category' , category
|
||||
, '--publisher' , publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='div', attrs={'class':'info'})
|
||||
,dict(name='div', attrs={'id':['comments','navig']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [(u'Articles', u'http://exiledonline.com/feed/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
def get_article_url(self, article):
|
||||
raw = article.get('link', None)
|
||||
final = raw + 'all/1/'
|
||||
return final
|
||||
|
||||
|
||||
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--base-font-size', '10'
|
||||
, '--category' , category
|
||||
, '--publisher' , publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='div', attrs={'class':'info'})
|
||||
,dict(name='div', attrs={'id':['comments','navig']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [(u'Articles', u'http://exiledonline.com/feed/')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
def get_article_url(self, article):
|
||||
raw = article.get('link', None)
|
||||
final = raw + 'all/1/'
|
||||
return final
|
||||
|
||||
|
@ -1,59 +1,59 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.expansion.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Expansion(BasicNewsRecipe):
|
||||
title = 'Diario Expansion'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Lider de informacion de mercados, economica y politica'
|
||||
publisher = 'expansion.com'
|
||||
category = 'news, politics, Spain'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
delay = 1
|
||||
encoding = 'iso-8859-15'
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.expansion.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Expansion(BasicNewsRecipe):
|
||||
title = 'Diario Expansion'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Lider de informacion de mercados, economica y politica'
|
||||
publisher = 'expansion.com'
|
||||
category = 'news, politics, Spain'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
delay = 1
|
||||
encoding = 'iso-8859-15'
|
||||
language = 'es'
|
||||
|
||||
direction = 'ltr'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
|
||||
,(u'Temas del dia' , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script'])
|
||||
,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['dir' ] = self.direction
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
||||
direction = 'ltr'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [
|
||||
(u'Ultimas noticias', u'http://rss.expansion.com/rss/descarga.htm?data2=178')
|
||||
,(u'Temas del dia' , u'http://rss.expansion.com/rss/descarga.htm?data2=178')
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'principal'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script'])
|
||||
,dict(name='div', attrs={'class':['utilidades','tit_relacionadas']})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'tit_relacionadas'})]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['dir' ] = self.direction
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
|
||||
soup.head.insert(0,mcharset)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
@ -1,55 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.fastcompany.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class FastCompany(BasicNewsRecipe):
|
||||
title = 'Fast Company'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Where ideas and people meet'
|
||||
publisher = 'fastcompany.com'
|
||||
category = 'news, technology, gadgets, games'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
simultaneous_downloads = 1
|
||||
encoding = 'utf-8'
|
||||
lang = 'en'
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.fastcompany.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class FastCompany(BasicNewsRecipe):
|
||||
title = 'Fast Company'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Where ideas and people meet'
|
||||
publisher = 'fastcompany.com'
|
||||
category = 'news, technology, gadgets, games'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
simultaneous_downloads = 1
|
||||
encoding = 'utf-8'
|
||||
lang = 'en'
|
||||
language = 'en'
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
remove_tags = [dict(name=['embed','object']), dict(name='div',attrs={'class':'feedflare'})]
|
||||
|
||||
feeds = [(u'All News', u'http://feeds.feedburner.com/fastcompany/headlines')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll('a'):
|
||||
sp = item['href'].find('http://feedads.g.doubleclick.net/')
|
||||
if sp != -1:
|
||||
item.extract()
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
remove_tags = [dict(name=['embed','object']), dict(name='div',attrs={'class':'feedflare'})]
|
||||
|
||||
feeds = [(u'All News', u'http://feeds.feedburner.com/fastcompany/headlines')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
|
||||
soup.head.insert(0,mlang)
|
||||
soup.head.insert(1,mcharset)
|
||||
for item in soup.findAll('a'):
|
||||
sp = item['href'].find('http://feedads.g.doubleclick.net/')
|
||||
if sp != -1:
|
||||
item.extract()
|
||||
return self.adeify_images(soup)
|
||||
|
||||
|
@ -1,51 +1,51 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
|
||||
'''
|
||||
Profile to download FAZ.net
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FazNet(BasicNewsRecipe):
|
||||
title = 'FAZ NET'
|
||||
__author__ = 'Kovid Goyal, Darko Miletic'
|
||||
description = 'Frankfurter Allgemeine Zeitung'
|
||||
publisher = 'FAZ Electronic Media GmbH'
|
||||
category = 'news, politics, Germany'
|
||||
use_embedded_content = False
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Kovid Goyal <kovid at kovidgoyal.net>, Darko Miletic <darko at gmail.com>'
|
||||
'''
|
||||
Profile to download FAZ.net
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FazNet(BasicNewsRecipe):
|
||||
title = 'FAZ NET'
|
||||
__author__ = 'Kovid Goyal, Darko Miletic'
|
||||
description = 'Frankfurter Allgemeine Zeitung'
|
||||
publisher = 'FAZ Electronic Media GmbH'
|
||||
category = 'news, politics, Germany'
|
||||
use_embedded_content = False
|
||||
language = 'de'
|
||||
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed','base'])
|
||||
,dict(name='div', attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [ ('FAZ.NET', 'http://www.faz.net/s/Rub/Tpl~Epartner~SRss_.xml') ]
|
||||
|
||||
def print_version(self, url):
|
||||
article, sep, rest = url.partition('?')
|
||||
return article.replace('.html', '~Afor~Eprint.html')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
del soup.body['onload']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'Article'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','embed','base'])
|
||||
,dict(name='div', attrs={'class':['LinkBoxModulSmall','ModulVerlagsInfo']})
|
||||
]
|
||||
|
||||
|
||||
feeds = [ ('FAZ.NET', 'http://www.faz.net/s/Rub/Tpl~Epartner~SRss_.xml') ]
|
||||
|
||||
def print_version(self, url):
|
||||
article, sep, rest = url.partition('?')
|
||||
return article.replace('.html', '~Afor~Eprint.html')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
del soup.body['onload']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user