mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Sync to trunk
This commit is contained in:
commit
373d224738
@ -19,7 +19,7 @@ import mechanize
|
||||
|
||||
mimetypes.add_type('application/epub+zip', '.epub')
|
||||
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
|
||||
mimetypes.add_type('http://www.w3.org/1999/xhtml', '.xhtml')
|
||||
mimetypes.add_type('application/xhtml+xml', '.xhtml')
|
||||
mimetypes.add_type('image/svg+xml', '.svg')
|
||||
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
|
||||
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
|
||||
|
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.4.134'
|
||||
__version__ = '0.4.135'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
'''
|
||||
Various run time constants.
|
||||
|
@ -141,6 +141,7 @@ def set_file_type_metadata(stream, mi, ftype):
|
||||
plugin.set_metadata(stream, mi, ftype.lower().strip())
|
||||
break
|
||||
except:
|
||||
print 'Failed to set metadata for', repr(getattr(mi, 'title', ''))
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
|
@ -58,9 +58,12 @@ class DeviceScanner(object):
|
||||
return False
|
||||
|
||||
def is_device_connected(self, device):
|
||||
vendor_ids = device.VENDOR_ID if hasattr(device.VENDOR_ID, '__len__') else [device.VENDOR_ID]
|
||||
product_ids = device.PRODUCT_ID if hasattr(device.PRODUCT_ID, '__len__') else [device.PRODUCT_ID]
|
||||
if iswindows:
|
||||
vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID
|
||||
vidd, pidd = 'vid_%i'%device.VENDOR_ID, 'pid_%i'%device.PRODUCT_ID
|
||||
for vendor_id, product_id in zip(vendor_ids, product_ids):
|
||||
vid, pid = 'vid_%4.4x'%vendor_id, 'pid_%4.4x'%product_id
|
||||
vidd, pidd = 'vid_%i'%vendor_id, 'pid_%i'%product_id
|
||||
for device_id in self.devices:
|
||||
if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
|
||||
if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)):
|
||||
@ -68,7 +71,7 @@ class DeviceScanner(object):
|
||||
return True
|
||||
else:
|
||||
for vendor, product, bcdDevice in self.devices:
|
||||
if device.VENDOR_ID == vendor and device.PRODUCT_ID == product:
|
||||
if vendor in vendor_ids and product in product_ids:
|
||||
if self.test_bcd(bcdDevice, getattr(device, 'BCD', None)):
|
||||
if device.can_handle((vendor, product, bcdDevice)):
|
||||
return True
|
||||
|
@ -467,7 +467,7 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
if self.htmlfile.is_binary:
|
||||
raise ValueError('Not a valid HTML file: '+self.htmlfile.path)
|
||||
src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip()
|
||||
src = src.replace('\x00', '')
|
||||
src = src.replace('\x00', '').replace('\r', ' ')
|
||||
src = self.preprocess(src)
|
||||
# lxml chokes on unicode input when it contains encoding declarations
|
||||
for pat in ENCODING_PATS:
|
||||
|
@ -917,7 +917,8 @@ class HTMLConverter(object, LoggingInterface):
|
||||
blockStyle=self.current_block.blockStyle)
|
||||
|
||||
|
||||
def process_image(self, path, tag_css, width=None, height=None, dropcaps=False):
|
||||
def process_image(self, path, tag_css, width=None, height=None,
|
||||
dropcaps=False, rescale=False):
|
||||
def detect_encoding(im):
|
||||
fmt = im.format
|
||||
if fmt == 'JPG':
|
||||
@ -936,10 +937,6 @@ class HTMLConverter(object, LoggingInterface):
|
||||
return
|
||||
encoding = detect_encoding(im)
|
||||
|
||||
if width == None or height == None:
|
||||
width, height = im.size
|
||||
|
||||
factor = 720./self.profile.dpi
|
||||
|
||||
def scale_image(width, height):
|
||||
if width <= 0:
|
||||
@ -955,8 +952,15 @@ class HTMLConverter(object, LoggingInterface):
|
||||
return pt.name
|
||||
except (IOError, SystemError), err: # PIL chokes on interlaced PNG images as well a some GIF images
|
||||
self.log_warning(_('Unable to process image %s. Error: %s')%(path, err))
|
||||
return None
|
||||
|
||||
if width == None or height == None:
|
||||
width, height = im.size
|
||||
elif rescale and (width < im.size[0] or height < im.size[1]):
|
||||
path = scale_image(width, height)
|
||||
if not path:
|
||||
return
|
||||
|
||||
factor = 720./self.profile.dpi
|
||||
pheight = int(self.current_page.pageStyle.attrs['textheight'])
|
||||
pwidth = int(self.current_page.pageStyle.attrs['textwidth'])
|
||||
|
||||
@ -1518,7 +1522,8 @@ class HTMLConverter(object, LoggingInterface):
|
||||
except:
|
||||
pass
|
||||
dropcaps = tag.has_key('class') and tag['class'] == 'libprs500_dropcaps'
|
||||
self.process_image(path, tag_css, width, height, dropcaps=dropcaps)
|
||||
self.process_image(path, tag_css, width, height,
|
||||
dropcaps=dropcaps, rescale=True)
|
||||
elif not urlparse(tag['src'])[0]:
|
||||
self.log_warn('Could not find image: '+tag['src'])
|
||||
else:
|
||||
|
@ -287,7 +287,7 @@ class MetaInformation(object):
|
||||
ans += [('ISBN', unicode(self.isbn))]
|
||||
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
|
||||
if self.series:
|
||||
ans += [(_('Series'), unicode(self.series))+ ' #%s'%self.format_series_index()]
|
||||
ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())]
|
||||
ans += [(_('Language'), unicode(self.language))]
|
||||
for i, x in enumerate(ans):
|
||||
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
|
||||
|
@ -18,7 +18,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
from calibre.ebooks.metadata import get_parser, MetaInformation
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre import CurrentDir, fit_image
|
||||
from calibre import CurrentDir
|
||||
|
||||
class EPubException(Exception):
|
||||
pass
|
||||
|
@ -38,8 +38,13 @@ def cover_from_isbn(isbn, timeout=5.):
|
||||
browser = _browser()
|
||||
_timeout = socket.getdefaulttimeout()
|
||||
socket.setdefaulttimeout(timeout)
|
||||
src = None
|
||||
try:
|
||||
src = browser.open('http://www.librarything.com/isbn/'+isbn).read().decode('utf-8', 'replace')
|
||||
except Exception, err:
|
||||
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
|
||||
raise LibraryThingError(_('LibraryThing.com timed out. Try again later.'))
|
||||
raise
|
||||
s = BeautifulSoup(src)
|
||||
url = s.find('td', attrs={'class':'left'})
|
||||
if url is None:
|
||||
|
@ -86,9 +86,9 @@ class MetadataUpdater(object):
|
||||
image_base, = unpack('>I', record0[108:112])
|
||||
flags, = unpack('>I', record0[128:132])
|
||||
have_exth = self.have_exth = (flags & 0x40) != 0
|
||||
self.cover_record = self.thumbnail_record = None
|
||||
if not have_exth:
|
||||
return
|
||||
self.cover_record = self.thumbnail_record = None
|
||||
exth_off = unpack('>I', record0[20:24])[0] + 16 + record0.start
|
||||
exth = self.exth = StreamSlicer(stream, exth_off, record0.stop)
|
||||
nitems, = unpack('>I', exth[8:12])
|
||||
@ -143,6 +143,8 @@ class MetadataUpdater(object):
|
||||
exth = ['EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad]
|
||||
exth = ''.join(exth)
|
||||
title = (mi.title or _('Unknown')).encode(self.codec, 'replace')
|
||||
if getattr(self, 'exth', None) is None:
|
||||
raise MobiError('No existing EXTH record. Cannot update metadata.')
|
||||
title_off = (self.exth.start - self.record0.start) + len(exth)
|
||||
title_len = len(title)
|
||||
trail = len(self.exth) - len(exth) - len(title)
|
||||
@ -151,8 +153,12 @@ class MetadataUpdater(object):
|
||||
self.exth[:] = ''.join([exth, title, '\0' * trail])
|
||||
self.record0[84:92] = pack('>II', title_off, title_len)
|
||||
self.record0[92:96] = iana2mobi(mi.language)
|
||||
if mi.cover_data[1]:
|
||||
data = mi.cover_data[1]
|
||||
if mi.cover_data[1] or mi.cover:
|
||||
try:
|
||||
data = mi.cover_data[1] if mi.cover_data[1] else open(mi.cover, 'rb').read()
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
if self.cover_record is not None:
|
||||
size = len(self.cover_record)
|
||||
cover = rescale_image(data, size)
|
||||
|
@ -618,7 +618,7 @@ class OPF(object):
|
||||
def fset(self, val):
|
||||
remove = list(self.authors_path(self.metadata))
|
||||
for elem in remove:
|
||||
self.metadata.remove(elem)
|
||||
elem.getparent().remove(elem)
|
||||
for author in val:
|
||||
attrib = {'{%s}role'%self.NAMESPACES['opf']: 'aut'}
|
||||
elem = self.create_metadata_element('creator', attrib=attrib)
|
||||
|
@ -306,13 +306,15 @@ IANA_MOBI = \
|
||||
'zu': {None: (53, 0)}}
|
||||
|
||||
def iana2mobi(icode):
|
||||
langdict, subtags = IANA_MOBI[None], []
|
||||
if icode:
|
||||
subtags = list(icode.split('-'))
|
||||
langdict = IANA_MOBI[None]
|
||||
while len(subtags) > 0:
|
||||
lang = subtags.pop(0).lower()
|
||||
if lang in IANA_MOBI:
|
||||
langdict = IANA_MOBI[lang]
|
||||
break
|
||||
|
||||
mcode = langdict[None]
|
||||
while len(subtags) > 0:
|
||||
subtag = subtags.pop(0)
|
||||
|
@ -17,6 +17,8 @@ import types
|
||||
import re
|
||||
import copy
|
||||
from itertools import izip
|
||||
from weakref import WeakKeyDictionary
|
||||
from xml.dom import SyntaxErr as CSSSyntaxError
|
||||
import cssutils
|
||||
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
|
||||
CSSValueList, cssproperties
|
||||
@ -106,7 +108,7 @@ class CSSSelector(etree.XPath):
|
||||
|
||||
|
||||
class Stylizer(object):
|
||||
STYLESHEETS = {}
|
||||
STYLESHEETS = WeakKeyDictionary()
|
||||
|
||||
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
|
||||
self.oeb = oeb
|
||||
@ -131,18 +133,19 @@ class Stylizer(object):
|
||||
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||
href = urlnormalize(elem.attrib['href'])
|
||||
path = item.abshref(href)
|
||||
if path not in oeb.manifest.hrefs:
|
||||
sitem = oeb.manifest.hrefs.get(path, None)
|
||||
if sitem is None:
|
||||
self.logger.warn(
|
||||
'Stylesheet %r referenced by file %r not in manifest' %
|
||||
(path, item.href))
|
||||
continue
|
||||
if path in self.STYLESHEETS:
|
||||
stylesheet = self.STYLESHEETS[path]
|
||||
if sitem in self.STYLESHEETS:
|
||||
stylesheet = self.STYLESHEETS[sitem]
|
||||
else:
|
||||
data = self._fetch_css_file(path)[1]
|
||||
stylesheet = parser.parseString(data, href=path)
|
||||
stylesheet.namespaces['h'] = XHTML_NS
|
||||
self.STYLESHEETS[path] = stylesheet
|
||||
self.STYLESHEETS[sitem] = stylesheet
|
||||
stylesheets.append(stylesheet)
|
||||
rules = []
|
||||
index = 0
|
||||
@ -291,10 +294,14 @@ class Style(object):
|
||||
|
||||
def _apply_style_attr(self):
|
||||
attrib = self._element.attrib
|
||||
if 'style' in attrib:
|
||||
if 'style' not in attrib:
|
||||
return
|
||||
css = attrib['style'].split(';')
|
||||
css = filter(None, map(lambda x: x.strip(), css))
|
||||
css = filter(None, (x.strip() for x in css))
|
||||
try:
|
||||
style = CSSStyleDeclaration('; '.join(css))
|
||||
except CSSSyntaxError:
|
||||
return
|
||||
self._style.update(self._stylizer.flatten_style(style))
|
||||
|
||||
def _has_parent(self):
|
||||
|
@ -105,36 +105,6 @@
|
||||
<string>Book Cover</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="_2" >
|
||||
<item row="0" column="0" >
|
||||
<layout class="QHBoxLayout" name="_3" >
|
||||
<item>
|
||||
<widget class="ImageView" name="cover" >
|
||||
<property name="text" >
|
||||
<string/>
|
||||
</property>
|
||||
<property name="pixmap" >
|
||||
<pixmap resource="../images.qrc" >:/images/book.svg</pixmap>
|
||||
</property>
|
||||
<property name="scaledContents" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="alignment" >
|
||||
<set>Qt::AlignCenter</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="2" column="0" >
|
||||
<widget class="QCheckBox" name="opt_prefer_metadata_cover" >
|
||||
<property name="text" >
|
||||
<string>Use cover from &source file</string>
|
||||
</property>
|
||||
<property name="checked" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0" >
|
||||
<layout class="QVBoxLayout" name="_4" >
|
||||
<property name="spacing" >
|
||||
@ -186,6 +156,36 @@
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="2" column="0" >
|
||||
<widget class="QCheckBox" name="opt_prefer_metadata_cover" >
|
||||
<property name="text" >
|
||||
<string>Use cover from &source file</string>
|
||||
</property>
|
||||
<property name="checked" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="0" >
|
||||
<layout class="QHBoxLayout" name="_3" >
|
||||
<item>
|
||||
<widget class="ImageView" name="cover" >
|
||||
<property name="text" >
|
||||
<string/>
|
||||
</property>
|
||||
<property name="pixmap" >
|
||||
<pixmap resource="../images.qrc" >:/images/book.svg</pixmap>
|
||||
</property>
|
||||
<property name="scaledContents" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="alignment" >
|
||||
<set>Qt::AlignCenter</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
<zorder>opt_prefer_metadata_cover</zorder>
|
||||
<zorder></zorder>
|
||||
@ -507,6 +507,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="9" column="0" >
|
||||
<widget class="QCheckBox" name="opt_remove_first_image" >
|
||||
<property name="text" >
|
||||
<string>Remove &first image from source file</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item>
|
||||
|
BIN
src/calibre/gui2/images/news/starbulletin.png
Normal file
BIN
src/calibre/gui2/images/news/starbulletin.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 983 B |
@ -18,6 +18,7 @@ from calibre.library.database2 import FIELD_MAP
|
||||
from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
|
||||
error_dialog
|
||||
from calibre.utils.search_query_parser import SearchQueryParser
|
||||
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
|
||||
|
||||
class LibraryDelegate(QItemDelegate):
|
||||
COLOR = QColor("blue")
|
||||
@ -423,7 +424,7 @@ class BooksModel(QAbstractTableModel):
|
||||
|
||||
|
||||
|
||||
def get_preferred_formats(self, rows, formats, paths=False):
|
||||
def get_preferred_formats(self, rows, formats, paths=False, set_metadata=False):
|
||||
ans = []
|
||||
for row in (row.row() for row in rows):
|
||||
format = None
|
||||
@ -441,6 +442,9 @@ class BooksModel(QAbstractTableModel):
|
||||
pt = PersistentTemporaryFile(suffix='.'+format)
|
||||
pt.write(self.db.format(row, format))
|
||||
pt.flush()
|
||||
if set_metadata:
|
||||
_set_metadata(pt, self.db.get_metadata(row, get_cover=True),
|
||||
format)
|
||||
pt.close() if paths else pt.seek(0)
|
||||
ans.append(pt)
|
||||
else:
|
||||
|
@ -701,7 +701,7 @@ class Main(MainWindow, Ui_MainWindow):
|
||||
progress.show()
|
||||
try:
|
||||
for c, book in enumerate(paths):
|
||||
progress.set_value(c)
|
||||
progress.set_value(c+1)
|
||||
if progress.canceled:
|
||||
return
|
||||
format = os.path.splitext(book)[1]
|
||||
@ -722,9 +722,11 @@ class Main(MainWindow, Ui_MainWindow):
|
||||
'cover':self.default_thumbnail, 'tags':[]})
|
||||
title = mi.title if isinstance(mi.title, unicode) else mi.title.decode(preferred_encoding, 'replace')
|
||||
progress.set_msg(_('Read metadata from ')+title)
|
||||
QApplication.processEvents()
|
||||
|
||||
if not to_device:
|
||||
progress.set_msg(_('Adding books to database...'))
|
||||
QApplication.processEvents()
|
||||
model = self.library_view.model()
|
||||
|
||||
paths = list(paths)
|
||||
@ -741,6 +743,7 @@ class Main(MainWindow, Ui_MainWindow):
|
||||
else:
|
||||
self.upload_books(paths, list(map(sanitize_file_name, names)), infos, on_card=on_card)
|
||||
finally:
|
||||
QApplication.processEvents()
|
||||
progress.hide()
|
||||
|
||||
def upload_books(self, files, names, metadata, on_card=False, memory=None):
|
||||
@ -929,7 +932,8 @@ class Main(MainWindow, Ui_MainWindow):
|
||||
mi['cover'] = self.cover_to_thumbnail(cdata)
|
||||
metadata = iter(metadata)
|
||||
_files = self.library_view.model().get_preferred_formats(rows,
|
||||
self.device_manager.device_class.FORMATS, paths=True)
|
||||
self.device_manager.device_class.FORMATS,
|
||||
paths=True, set_metadata=True)
|
||||
files = [getattr(f, 'name', None) for f in _files]
|
||||
bad, good, gf, names, remove_ids = [], [], [], [], []
|
||||
for f in files:
|
||||
@ -1223,6 +1227,8 @@ class Main(MainWindow, Ui_MainWindow):
|
||||
format = 'LRF'
|
||||
if 'EPUB' in formats:
|
||||
format = 'EPUB'
|
||||
if 'MOBI' in formats:
|
||||
format = 'MOBI'
|
||||
if not formats:
|
||||
d = error_dialog(self, _('Cannot view'),
|
||||
_('%s has no available formats.')%(title,))
|
||||
|
@ -16,6 +16,7 @@ from calibre.utils.config import Config, StringConfig
|
||||
from calibre.gui2.viewer.config_ui import Ui_Dialog
|
||||
from calibre.gui2.viewer.js import bookmarks, referencing
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.constants import iswindows
|
||||
|
||||
def load_builtin_fonts():
|
||||
from calibre.ebooks.lrf.fonts.liberation import LiberationMono_BoldItalic
|
||||
@ -57,9 +58,12 @@ def config(defaults=None):
|
||||
help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
|
||||
|
||||
fonts = c.add_group('FONTS', _('Font options'))
|
||||
fonts('serif_family', default='Liberation Serif', help=_('The serif font family'))
|
||||
fonts('sans_family', default='Liberation Sans', help=_('The sans-serif font family'))
|
||||
fonts('mono_family', default='Liberation Mono', help=_('The monospaced font family'))
|
||||
fonts('serif_family', default='Times New Roman' if iswindows else 'Liberation Serif',
|
||||
help=_('The serif font family'))
|
||||
fonts('sans_family', default='Verdana' if iswindows else 'Liberation Sans',
|
||||
help=_('The sans-serif font family'))
|
||||
fonts('mono_family', default='Courier New' if iswindows else 'Liberation Mono',
|
||||
help=_('The monospaced font family'))
|
||||
fonts('default_font_size', default=20, help=_('The standard font size in px'))
|
||||
fonts('mono_font_size', default=16, help=_('The monospaced font size in px'))
|
||||
fonts('standard_font', default='serif', help=_('The standard font type'))
|
||||
|
@ -4,12 +4,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
Backend that implements storage of ebooks in an sqlite database.
|
||||
'''
|
||||
import sqlite3 as sqlite
|
||||
import datetime, re, os, cPickle, traceback, sre_constants
|
||||
import datetime, re, os, cPickle, sre_constants
|
||||
from zlib import compress, decompress
|
||||
|
||||
from calibre import sanitize_file_name
|
||||
from calibre.ebooks.metadata.meta import set_metadata, metadata_from_formats
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks import BOOK_EXTENSIONS
|
||||
from calibre.web.feeds.recipes import migrate_automatic_profile_to_automatic_recipe
|
||||
@ -1389,76 +1388,6 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
||||
def all_ids(self):
|
||||
return [i[0] for i in self.conn.get('SELECT id FROM books')]
|
||||
|
||||
def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
|
||||
index_is_id=False, callback=None):
|
||||
if not os.path.exists(dir):
|
||||
raise IOError('Target directory does not exist: '+dir)
|
||||
by_author = {}
|
||||
count = 0
|
||||
for index in indices:
|
||||
id = index if index_is_id else self.id(index)
|
||||
au = self.conn.get('SELECT author_sort FROM books WHERE id=?',
|
||||
(id,), all=False)
|
||||
if not au:
|
||||
au = self.authors(index, index_is_id=index_is_id)
|
||||
if not au:
|
||||
au = _('Unknown')
|
||||
au = au.split(',')[0]
|
||||
if not by_author.has_key(au):
|
||||
by_author[au] = []
|
||||
by_author[au].append(index)
|
||||
for au in by_author.keys():
|
||||
apath = os.path.join(dir, sanitize_file_name(au))
|
||||
if not single_dir and not os.path.exists(apath):
|
||||
os.mkdir(apath)
|
||||
for idx in by_author[au]:
|
||||
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
|
||||
tpath = os.path.join(apath, sanitize_file_name(title))
|
||||
id = idx if index_is_id else self.id(idx)
|
||||
id = str(id)
|
||||
if not single_dir and not os.path.exists(tpath):
|
||||
os.mkdir(tpath)
|
||||
|
||||
name = au + ' - ' + title if byauthor else title + ' - ' + au
|
||||
name += '_'+id
|
||||
base = dir if single_dir else tpath
|
||||
mi = self.get_metadata(idx, index_is_id=index_is_id)
|
||||
cover = self.cover(idx, index_is_id=index_is_id)
|
||||
if cover is not None:
|
||||
cname = sanitize_file_name(name) + '.jpg'
|
||||
cpath = os.path.join(base, cname)
|
||||
open(cpath, 'wb').write(cover)
|
||||
mi.cover = cname
|
||||
f = open(os.path.join(base, sanitize_file_name(name)+'.opf'), 'wb')
|
||||
if not mi.authors:
|
||||
mi.authors = [_('Unknown')]
|
||||
opf = OPFCreator(base, mi)
|
||||
opf.render(f)
|
||||
f.close()
|
||||
|
||||
fmts = self.formats(idx, index_is_id=index_is_id)
|
||||
if not fmts:
|
||||
fmts = ''
|
||||
for fmt in fmts.split(','):
|
||||
data = self.format(idx, fmt, index_is_id=index_is_id)
|
||||
if not data:
|
||||
continue
|
||||
fname = name +'.'+fmt.lower()
|
||||
fname = sanitize_file_name(fname)
|
||||
f = open(os.path.join(base, fname), 'w+b')
|
||||
f.write(data)
|
||||
f.flush()
|
||||
f.seek(0)
|
||||
try:
|
||||
set_metadata(f, mi, fmt.lower())
|
||||
except:
|
||||
print 'Error setting metadata for book:', mi.title
|
||||
traceback.print_exc()
|
||||
f.close()
|
||||
count += 1
|
||||
if callable(callback):
|
||||
if not callback(count, mi.title):
|
||||
return
|
||||
|
||||
|
||||
|
||||
@ -1573,42 +1502,6 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
|
||||
|
||||
return duplicates
|
||||
|
||||
def export_single_format_to_dir(self, dir, indices, format,
|
||||
index_is_id=False, callback=None):
|
||||
dir = os.path.abspath(dir)
|
||||
if not index_is_id:
|
||||
indices = map(self.id, indices)
|
||||
failures = []
|
||||
for count, id in enumerate(indices):
|
||||
try:
|
||||
data = self.format(id, format, index_is_id=True)
|
||||
if not data:
|
||||
failures.append((id, self.title(id, index_is_id=True)))
|
||||
continue
|
||||
except:
|
||||
failures.append((id, self.title(id, index_is_id=True)))
|
||||
continue
|
||||
title = self.title(id, index_is_id=True)
|
||||
au = self.authors(id, index_is_id=True)
|
||||
if not au:
|
||||
au = _('Unknown')
|
||||
fname = '%s - %s.%s'%(title, au, format.lower())
|
||||
fname = sanitize_file_name(fname)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
f = open(os.path.join(dir, fname), 'w+b')
|
||||
f.write(data)
|
||||
f.seek(0)
|
||||
try:
|
||||
set_metadata(f, self.get_metadata(id, index_is_id=True), stream_type=format.lower())
|
||||
except:
|
||||
pass
|
||||
f.close()
|
||||
if callable(callback):
|
||||
if not callback(count, title):
|
||||
break
|
||||
return failures
|
||||
|
||||
|
||||
|
||||
class SearchToken(object):
|
||||
|
@ -19,8 +19,9 @@ from calibre.library import title_sort
|
||||
from calibre.library.database import LibraryDatabase
|
||||
from calibre.library.sqlite import connect, IntegrityError
|
||||
from calibre.utils.search_query_parser import SearchQueryParser
|
||||
from calibre.ebooks.metadata import string_to_authors, authors_to_string
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.metadata import string_to_authors, authors_to_string, MetaInformation
|
||||
from calibre.ebooks.metadata.meta import get_metadata, set_metadata
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.customize.ui import run_plugins_on_import
|
||||
@ -556,7 +557,8 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
traceback.print_exc()
|
||||
continue
|
||||
|
||||
def cover(self, index, index_is_id=False, as_file=False, as_image=False):
|
||||
def cover(self, index, index_is_id=False, as_file=False, as_image=False,
|
||||
as_path=False):
|
||||
'''
|
||||
Return the cover image as a bytestring (in JPEG format) or None.
|
||||
|
||||
@ -566,6 +568,8 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
id = index if index_is_id else self.id(index)
|
||||
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
|
||||
if os.access(path, os.R_OK):
|
||||
if as_path:
|
||||
return path
|
||||
f = open(path, 'rb')
|
||||
if as_image:
|
||||
img = QImage()
|
||||
@ -573,6 +577,30 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
return img
|
||||
return f if as_file else f.read()
|
||||
|
||||
def get_metadata(self, idx, index_is_id=False, get_cover=False):
|
||||
'''
|
||||
Convenience method to return metadata as a L{MetaInformation} object.
|
||||
'''
|
||||
aum = self.authors(idx, index_is_id=index_is_id)
|
||||
if aum: aum = [a.strip().replace('|', ',') for a in aum.split(',')]
|
||||
mi = MetaInformation(self.title(idx, index_is_id=index_is_id), aum)
|
||||
mi.author_sort = self.author_sort(idx, index_is_id=index_is_id)
|
||||
mi.comments = self.comments(idx, index_is_id=index_is_id)
|
||||
mi.publisher = self.publisher(idx, index_is_id=index_is_id)
|
||||
tags = self.tags(idx, index_is_id=index_is_id)
|
||||
if tags:
|
||||
mi.tags = [i.strip() for i in tags.split(',')]
|
||||
mi.series = self.series(idx, index_is_id=index_is_id)
|
||||
if mi.series:
|
||||
mi.series_index = self.series_index(idx, index_is_id=index_is_id)
|
||||
mi.rating = self.rating(idx, index_is_id=index_is_id)
|
||||
mi.isbn = self.isbn(idx, index_is_id=index_is_id)
|
||||
id = idx if index_is_id else self.id(idx)
|
||||
mi.application_id = id
|
||||
if get_cover:
|
||||
mi.cover = self.cover(id, index_is_id=True, as_path=True)
|
||||
return mi
|
||||
|
||||
def has_book(self, mi):
|
||||
title = mi.title
|
||||
if title:
|
||||
@ -1323,4 +1351,106 @@ books_series_link feeds
|
||||
progress.reset()
|
||||
return len(books)
|
||||
|
||||
def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
|
||||
index_is_id=False, callback=None):
|
||||
if not os.path.exists(dir):
|
||||
raise IOError('Target directory does not exist: '+dir)
|
||||
by_author = {}
|
||||
count = 0
|
||||
for index in indices:
|
||||
id = index if index_is_id else self.id(index)
|
||||
au = self.conn.get('SELECT author_sort FROM books WHERE id=?',
|
||||
(id,), all=False)
|
||||
if not au:
|
||||
au = self.authors(index, index_is_id=index_is_id)
|
||||
if not au:
|
||||
au = _('Unknown')
|
||||
au = au.split(',')[0]
|
||||
if not by_author.has_key(au):
|
||||
by_author[au] = []
|
||||
by_author[au].append(index)
|
||||
for au in by_author.keys():
|
||||
apath = os.path.join(dir, sanitize_file_name(au))
|
||||
if not single_dir and not os.path.exists(apath):
|
||||
os.mkdir(apath)
|
||||
for idx in by_author[au]:
|
||||
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
|
||||
tpath = os.path.join(apath, sanitize_file_name(title))
|
||||
id = idx if index_is_id else self.id(idx)
|
||||
id = str(id)
|
||||
if not single_dir and not os.path.exists(tpath):
|
||||
os.mkdir(tpath)
|
||||
|
||||
name = au + ' - ' + title if byauthor else title + ' - ' + au
|
||||
name += '_'+id
|
||||
base = dir if single_dir else tpath
|
||||
mi = self.get_metadata(idx, index_is_id=index_is_id, get_cover=True)
|
||||
f = open(os.path.join(base, sanitize_file_name(name)+'.opf'), 'wb')
|
||||
if not mi.authors:
|
||||
mi.authors = [_('Unknown')]
|
||||
opf = OPFCreator(base, mi)
|
||||
opf.render(f)
|
||||
f.close()
|
||||
|
||||
fmts = self.formats(idx, index_is_id=index_is_id)
|
||||
if not fmts:
|
||||
fmts = ''
|
||||
for fmt in fmts.split(','):
|
||||
data = self.format(idx, fmt, index_is_id=index_is_id)
|
||||
if not data:
|
||||
continue
|
||||
fname = name +'.'+fmt.lower()
|
||||
fname = sanitize_file_name(fname)
|
||||
f = open(os.path.join(base, fname), 'w+b')
|
||||
f.write(data)
|
||||
f.flush()
|
||||
f.seek(0)
|
||||
try:
|
||||
set_metadata(f, mi, fmt.lower())
|
||||
except:
|
||||
pass
|
||||
f.close()
|
||||
count += 1
|
||||
if callable(callback):
|
||||
if not callback(count, mi.title):
|
||||
return
|
||||
|
||||
def export_single_format_to_dir(self, dir, indices, format,
|
||||
index_is_id=False, callback=None):
|
||||
dir = os.path.abspath(dir)
|
||||
if not index_is_id:
|
||||
indices = map(self.id, indices)
|
||||
failures = []
|
||||
for count, id in enumerate(indices):
|
||||
try:
|
||||
data = self.format(id, format, index_is_id=True)
|
||||
if not data:
|
||||
failures.append((id, self.title(id, index_is_id=True)))
|
||||
continue
|
||||
except:
|
||||
failures.append((id, self.title(id, index_is_id=True)))
|
||||
continue
|
||||
title = self.title(id, index_is_id=True)
|
||||
au = self.authors(id, index_is_id=True)
|
||||
if not au:
|
||||
au = _('Unknown')
|
||||
fname = '%s - %s.%s'%(title, au, format.lower())
|
||||
fname = sanitize_file_name(fname)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
f = open(os.path.join(dir, fname), 'w+b')
|
||||
f.write(data)
|
||||
f.seek(0)
|
||||
try:
|
||||
set_metadata(f, self.get_metadata(id, index_is_id=True, get_cover=True),
|
||||
stream_type=format.lower())
|
||||
except:
|
||||
pass
|
||||
f.close()
|
||||
if callable(callback):
|
||||
if not callback(count, title):
|
||||
break
|
||||
return failures
|
||||
|
||||
|
||||
|
||||
|
@ -236,7 +236,9 @@ Donors per day: %(dpd).2f
|
||||
ml = mdates.MonthLocator() # every month
|
||||
fig = plt.figure(1, (8, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
|
||||
ax = fig.add_subplot(111)
|
||||
average = sum(y)/len(y)
|
||||
ax.bar(x, y, align='center', width=20, color='g')
|
||||
ax.hlines([average], x[0], x[-1])
|
||||
ax.xaxis.set_major_locator(ml)
|
||||
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %y'))
|
||||
ax.set_xlim(_months[0].min-timedelta(days=15), _months[-1].min+timedelta(days=15))
|
||||
|
@ -30,7 +30,7 @@ class Distribution(object):
|
||||
('libusb', '0.1.12', None, None, None),
|
||||
('Qt', '4.4.0', 'qt', 'libqt4-core libqt4-gui', 'qt4'),
|
||||
('PyQt', '4.4.2', 'PyQt4', 'python-qt4', 'PyQt4'),
|
||||
('mechanize for python', '0.1.8', 'dev-python/mechanize', 'python-mechanize', 'python-mechanize'),
|
||||
('mechanize for python', '0.1.11', 'dev-python/mechanize', 'python-mechanize', 'python-mechanize'),
|
||||
('ImageMagick', '6.3.5', 'imagemagick', 'imagemagick', 'ImageMagick'),
|
||||
('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
|
||||
('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -91,7 +91,7 @@ class BasicNewsRecipe(object, LoggingInterface):
|
||||
#: If True stylesheets are not downloaded and processed
|
||||
no_stylesheets = False
|
||||
|
||||
#: Convenient flag to strip all javascripts tags from the downloaded HTML
|
||||
#: Convenient flag to strip all javascript tags from the downloaded HTML
|
||||
remove_javascript = True
|
||||
|
||||
#: If True the GUI will ask the user for a username and password
|
||||
|
@ -26,7 +26,8 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'laprensa', 'amspec', 'freakonomics', 'criticadigital', 'elcronista',
|
||||
'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english',
|
||||
'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda',
|
||||
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz', 'exiled',
|
||||
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
|
||||
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
@ -1,32 +1,39 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
ambito.com
|
||||
'''
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Ambito(BasicNewsRecipe):
|
||||
title = 'Ambito.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
publisher = 'Ambito.com'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'iso--8859-1'
|
||||
language = _('Spanish')
|
||||
encoding = 'iso-8859-1'
|
||||
cover_url = 'http://www.ambito.com/img/logo_.jpg'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
|
||||
|
||||
remove_tags = [dict(name=['object','link'])]
|
||||
|
||||
feeds = [
|
||||
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
|
||||
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
|
||||
@ -43,3 +50,12 @@ class Ambito(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -7,25 +7,33 @@ b92.net
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class B92(BasicNewsRecipe):
|
||||
title = u'B92'
|
||||
title = 'B92'
|
||||
__author__ = 'Darko Miletic'
|
||||
language = _('Serbian')
|
||||
description = 'Dnevne vesti iz Srbije i sveta'
|
||||
oldest_article = 7
|
||||
oldest_article = 2
|
||||
publisher = 'B92.net'
|
||||
category = 'news, politics, Serbia'
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://static.b92.net/images/fp/logo.gif'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, Serbia'
|
||||
, '--publisher', 'B92'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
feeds = [
|
||||
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
|
||||
@ -44,3 +52,16 @@ class B92(BasicNewsRecipe):
|
||||
if biz:
|
||||
nurl = 'http://www.b92.net/mobilni/biz/index.php?nav_id=' + article_id
|
||||
return nurl
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'sr-Latn'
|
||||
soup.html['lang'] = 'sr-Latn'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(name='img',align=True):
|
||||
del item['align']
|
||||
item.insert(0,'<br /><br />')
|
||||
return soup
|
||||
language = _('Serbian')
|
@ -5,31 +5,49 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blic.rs
|
||||
'''
|
||||
import string,re
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Blic(BasicNewsRecipe):
|
||||
title = u'Blic'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
||||
oldest_article = 7
|
||||
__author__ = u'Darko Miletic'
|
||||
description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
|
||||
publisher = 'RINGIER d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://www.blic.rs/resources/images/header_back_tile.png'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, Serbia'
|
||||
, '--publisher', 'Blic'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
|
||||
|
||||
feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
|
||||
|
||||
remove_tags = [dict(name=['object','link'])]
|
||||
|
||||
def print_version(self, url):
|
||||
start_url, question, rest_url = url.partition('?')
|
||||
return u'http://www.blic.rs/_print.php?' + rest_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -1,32 +1,36 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
clarin.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Clarin(BasicNewsRecipe):
|
||||
title = 'Clarin'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina y mundo'
|
||||
publisher = 'Grupo Clarin'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
language = _('Spanish')
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, Argentina'
|
||||
, '--publisher', 'Grupo Clarin'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='a' , attrs={'class':'Imp' })
|
||||
,dict(name='div' , attrs={'class':'Perma' })
|
||||
@ -49,3 +53,12 @@ class Clarin(BasicNewsRecipe):
|
||||
rest = artl.partition('-0')[-1]
|
||||
lmain = rest.partition('.')[0]
|
||||
return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -5,37 +5,47 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
danas.rs
|
||||
'''
|
||||
import string,re
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Danas(BasicNewsRecipe):
|
||||
title = 'Danas'
|
||||
title = u'Danas'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
|
||||
description = 'Vesti'
|
||||
publisher = 'Danas d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
no_stylesheets = False
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://www.danas.rs/images/basic/danas.gif'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, Serbia'
|
||||
, '--publisher', 'Danas'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'width_1_4' })
|
||||
,dict(name='div', attrs={'class':'metaClanka' })
|
||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
||||
,dict(name='div', attrs={'id':'comments'})
|
||||
,dict(name='div', attrs={'class':'baner' })
|
||||
,dict(name='div', attrs={'class':'slikaClanka'})
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
|
||||
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&action=print'
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
language = _('Serbian')
|
@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class DeStandaard(BasicNewsRecipe):
|
||||
title = u'De Standaard'
|
||||
__author__ = u'Darko Miletic'
|
||||
language = _('French')
|
||||
language = _('Dutch')
|
||||
description = u'News from Belgium'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
|
@ -13,6 +13,7 @@ class DeMorganBe(BasicNewsRecipe):
|
||||
__author__ = u'Darko Miletic'
|
||||
description = u'News from Belgium'
|
||||
oldest_article = 7
|
||||
language = _('Dutch')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
41
src/calibre/web/feeds/recipes/recipe_dna.py
Normal file
41
src/calibre/web/feeds/recipes/recipe_dna.py
Normal file
@ -0,0 +1,41 @@
|
||||
'''
|
||||
dnaindia.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DNAIndia(BasicNewsRecipe):
|
||||
|
||||
title = 'DNA India'
|
||||
description = 'Mumbai news, India news, World news, breaking news'
|
||||
__author__ = 'Kovid Goyal'
|
||||
language = _('English')
|
||||
|
||||
feeds = [
|
||||
('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
|
||||
('Popular News', 'http://www.dnaindia.com/syndication/rss_popular.xml'),
|
||||
('Recent Columns', 'http://www.dnaindia.com/syndication/rss_column.xml'),
|
||||
('Mumbai', 'http://www.dnaindia.com/syndication/rss,catid-1.xml'),
|
||||
('India', 'http://www.dnaindia.com/syndication/rss,catid-2.xml'),
|
||||
('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
|
||||
('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
|
||||
('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
|
||||
('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
|
||||
('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),
|
||||
]
|
||||
remove_tags = [{'id':'footer'}, {'class':['bottom', 'categoryHead']}]
|
||||
|
||||
def print_version(self, url):
|
||||
match = re.search(r'newsid=(\d+)', url)
|
||||
if not match:
|
||||
return url
|
||||
return 'http://www.dnaindia.com/dnaprint.asp?newsid='+match.group(1)
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
for t in soup.findAll(['table', 'tr', 'td']):
|
||||
t.name = 'div'
|
||||
|
||||
a = soup.find(href='http://www.3dsyndication.com/')
|
||||
if a is not None:
|
||||
a.parent.extract()
|
||||
return soup
|
@ -5,32 +5,37 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
emol.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ElMercurio(BasicNewsRecipe):
|
||||
title = 'El Mercurio online'
|
||||
language = _('Spanish')
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El sitio de noticias online de Chile'
|
||||
publisher = 'El Mercurio'
|
||||
category = 'news, politics, Chile'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Chile'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'despliegue-txt_750px'})
|
||||
,dict(name='div', attrs={'id':'div_cuerpo_participa'})
|
||||
]
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
|
||||
,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
|
||||
@ -46,3 +51,11 @@ class ElMercurio(BasicNewsRecipe):
|
||||
,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
elargentino.com
|
||||
'''
|
||||
@ -12,9 +12,11 @@ class ElArgentino(BasicNewsRecipe):
|
||||
title = 'ElArgentino.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
language = _('Spanish')
|
||||
publisher = 'ElArgentino.com'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
@ -22,10 +24,12 @@ class ElArgentino(BasicNewsRecipe):
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , 'ElArgentino.com'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'noprint' })
|
||||
,dict(name='div', attrs={'class':'encabezadoImprimir'})
|
||||
@ -50,7 +54,10 @@ class ElArgentino(BasicNewsRecipe):
|
||||
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
||||
soup.head.insert(0,mtag)
|
||||
soup.prettify()
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -12,35 +12,49 @@ class ElMundo(BasicNewsRecipe):
|
||||
title = 'El Mundo'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Spain'
|
||||
language = _('Spanish')
|
||||
publisher = 'El Mundo'
|
||||
category = 'news, politics, Spain'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'iso8859_15'
|
||||
cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Spain'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'noticia'})]
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['bloqueprincipal','noticia']})
|
||||
,dict(name='div', attrs={'class':['contenido_noticia_01']})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['herramientas','publicidad_google','video','herramientasarriba','contenido_noticia_02']})
|
||||
dict(name='div', attrs={'class':['herramientas','publicidad_google']})
|
||||
,dict(name='div', attrs={'id':'modulo_multimedia' })
|
||||
,dict(name=['object','script','link', 'a'])
|
||||
,dict(name='ul', attrs={'class':'herramientas' })
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
|
||||
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
|
||||
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
|
||||
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
|
||||
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
|
||||
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
|
||||
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
|
||||
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -8,25 +8,28 @@ estadao.com.br
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class Estadao(BasicNewsRecipe):
|
||||
title = 'O Estado de S. Paulo'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Brasil'
|
||||
language = _('Spanish')
|
||||
description = 'News from Brasil in Portugese'
|
||||
publisher = 'O Estado de S. Paulo'
|
||||
category = 'news, politics, Brasil'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
cover_url = 'http://www.estadao.com.br/img/logo_estadao.png'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Brasil'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'c1'})]
|
||||
|
||||
remove_tags = [
|
||||
@ -52,4 +55,8 @@ class Estadao(BasicNewsRecipe):
|
||||
ifr = soup.find('iframe')
|
||||
if ifr:
|
||||
ifr.extract()
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Portugese')
|
@ -7,37 +7,46 @@ granma.cubaweb.cu
|
||||
'''
|
||||
import urllib
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Granma(BasicNewsRecipe):
|
||||
title = 'Diario Granma'
|
||||
__author__ = 'Darko Miletic'
|
||||
language = _('Spanish')
|
||||
description = 'Organo oficial del Comite Central del Partido Comunista de Cuba'
|
||||
publisher = 'Granma'
|
||||
category = 'news, politics, Cuba'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Cuba'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'height':'466'})]
|
||||
|
||||
feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
del soup.body.table['style']
|
||||
rtag = soup.find('td', attrs={'height':'458'})
|
||||
if rtag:
|
||||
del rtag['style']
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll('table'):
|
||||
if item.has_key('width'):
|
||||
del item['width']
|
||||
if item.has_key('height'):
|
||||
del item['height']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
harpers.org - paid subscription/ printed issue articles
|
||||
This recipe only get's article's published in text format
|
||||
@ -9,13 +9,15 @@ images and pdf's are ignored
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Harpers_full(BasicNewsRecipe):
|
||||
title = u"Harper's Magazine - articles from printed edition"
|
||||
__author__ = u'Darko Miletic'
|
||||
description = u"Harper's Magazine: Founded June 1850."
|
||||
language = _('English')
|
||||
publisher = "Harpers's"
|
||||
category = 'news, politics, USA'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -26,6 +28,15 @@ class Harpers_full(BasicNewsRecipe):
|
||||
INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
|
||||
LOGIN = 'http://www.harpers.org'
|
||||
cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
|
||||
remove_tags = [
|
||||
@ -60,3 +71,10 @@ class Harpers_full(BasicNewsRecipe):
|
||||
,'description':''
|
||||
})
|
||||
return [(soup.head.title.string, articles)]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('English')
|
59
src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py
Normal file
59
src/calibre/web/feeds/recipes/recipe_honoluluadvertiser.py
Normal file
@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
honoluluadvertiser.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Honoluluadvertiser(BasicNewsRecipe):
|
||||
title = 'Honolulu Advertiser'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Latest national and local Hawaii sports news from The Honolulu Advertiser."
|
||||
publisher = 'Honolulu Advertiser'
|
||||
category = 'news, Honolulu, Hawaii'
|
||||
oldest_article = 2
|
||||
language = _('English')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://www.honoluluadvertiser.com/graphics/branding.gif'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher' , publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='td')]
|
||||
|
||||
remove_tags = [dict(name=['object','link'])]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' )
|
||||
,(u'Local news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS02&MIME=XML' )
|
||||
,(u'Sports', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS03&MIME=XML' )
|
||||
,(u'Island life', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS05&MIME=XML' )
|
||||
,(u'Entertainment', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS06&MIME=XML' )
|
||||
,(u'Business', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS04&MIME=XML' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
ubody, sep, rest = url.rpartition('/-1/')
|
||||
root, sep2, article_id = ubody.partition('/article/')
|
||||
return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart'
|
||||
|
15
src/calibre/web/feeds/recipes/recipe_indy_star.py
Normal file
15
src/calibre/web/feeds/recipes/recipe_indy_star.py
Normal file
@ -0,0 +1,15 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1234144423(BasicNewsRecipe):
|
||||
title = u'Indianapolis Star'
|
||||
oldest_article = 5
|
||||
language = _('English')
|
||||
__author__ = 'Owen Kelly'
|
||||
max_articles_per_feed = 100
|
||||
|
||||
cover_url = u'http://www2.indystar.com/frontpage/images/today.jpg'
|
||||
|
||||
feeds = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss&mime=XML'), (u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss&mime=XML'), (u'Business Headlines', u'http://www..indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss&mime=XML'), (u'Sports Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=SPORTS&template=rss&mime=XML'), (u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'), (u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&template=printart'
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
infobae.com
|
||||
'''
|
||||
@ -12,21 +12,23 @@ class Infobae(BasicNewsRecipe):
|
||||
title = 'Infobae.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
publisher = 'Infobae.com'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
language = _('Spanish')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'iso-8859-1'
|
||||
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , 'Infobae.com'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [
|
||||
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
|
||||
@ -39,3 +41,12 @@ class Infobae(BasicNewsRecipe):
|
||||
main, sep, article_part = url.partition('contenidos/')
|
||||
article_id, rsep, rrest = article_part.partition('-')
|
||||
return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -12,20 +12,24 @@ class JBOnline(BasicNewsRecipe):
|
||||
title = 'Jornal Brasileiro Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Brasil'
|
||||
publisher = 'Jornal Brasileiro'
|
||||
category = 'news, politics, Brasil'
|
||||
oldest_article = 2
|
||||
language = _('Spanish')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://jbonline.terra.com.br/img/logo_01.gif'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Brasil'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'corpoNoticia'})]
|
||||
|
||||
remove_tags = [dict(name=['script','object','form'])]
|
||||
@ -36,7 +40,8 @@ class JBOnline(BasicNewsRecipe):
|
||||
ifr = soup.find('iframe')
|
||||
if ifr:
|
||||
ifr.extract()
|
||||
item = soup.find('div', attrs={'id':'corpoNoticia'})
|
||||
if item:
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Portugese')
|
@ -6,28 +6,36 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
jutarnji.hr
|
||||
'''
|
||||
|
||||
import string, re
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class Jutarnji(BasicNewsRecipe):
|
||||
title = 'Jutarnji'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Online izdanje Jutarnjeg lista'
|
||||
title = u'Jutarnji'
|
||||
__author__ = u'Darko Miletic'
|
||||
description = u'Hrvatski portal'
|
||||
publisher = 'Jutarnji.hr'
|
||||
category = 'news, politics, Croatia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 1
|
||||
delay = 1
|
||||
language = _('Croatian')
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
encoding = 'cp1250'
|
||||
cover_url = 'http://www.jutarnji.hr/EPHResources/Images/2008/06/05/jhrlogo.png'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, Croatia'
|
||||
, '--publisher', 'Europapress holding d.o.o.'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags = [
|
||||
@ -49,11 +57,16 @@ class Jutarnji(BasicNewsRecipe):
|
||||
def print_version(self, url):
|
||||
main, split, rest = url.partition('.jl')
|
||||
rmain, rsplit, rrest = main.rpartition(',')
|
||||
return u'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
|
||||
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
|
||||
soup.head.insert(0,mtag)
|
||||
soup.prettify()
|
||||
mtag = '<meta http-equiv="Content-Language" content="hr"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(width=True):
|
||||
del item['width']
|
||||
return soup
|
||||
|
@ -13,21 +13,25 @@ class Juventudrebelde(BasicNewsRecipe):
|
||||
title = 'Juventud Rebelde'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Diario de la Juventud Cubana'
|
||||
publisher = 'Juventud rebelde'
|
||||
category = 'news, politics, Cuba'
|
||||
oldest_article = 2
|
||||
language = _('Spanish')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Cuba'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
|
||||
|
||||
feeds = [
|
||||
@ -40,4 +44,11 @@ class Juventudrebelde(BasicNewsRecipe):
|
||||
,(u'Lectura', u'http://www.juventudrebelde.cu/rss/generales.php?seccion=lectura' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -5,7 +5,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
juventudrebelde.co.cu
|
||||
'''
|
||||
from calibre import strftime
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -13,22 +12,33 @@ class Juventudrebelde_english(BasicNewsRecipe):
|
||||
title = 'Juventud Rebelde in english'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'The newspaper of Cuban Youth'
|
||||
language = _('English')
|
||||
publisher = 'Juventud Rebelde'
|
||||
category = 'news, politics, Cuba'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'iso-8859-1'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Cuba'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'read'})]
|
||||
|
||||
feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/' )]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('English')
|
@ -11,25 +11,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class LaCuarta(BasicNewsRecipe):
|
||||
title = 'La Cuarta'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El sitio de noticias online de Chile'
|
||||
description = 'La Cuarta Cibernetica: El Diario popular'
|
||||
publisher = 'CODISA, Consorcio Digital S.A.'
|
||||
category = 'news, politics, entertainment, Chile'
|
||||
oldest_article = 2
|
||||
language = _('Spanish')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Chile'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'articulo desplegado'}) ]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='script')
|
||||
,dict(name='ul')
|
||||
dict(name='ul')
|
||||
,dict(name='div', attrs={'id':['toolbox','articleImageDisplayer','enviarAmigo']})
|
||||
,dict(name='div', attrs={'class':['par ad-1','par ad-2']})
|
||||
,dict(name='input')
|
||||
@ -37,7 +40,14 @@ class LaCuarta(BasicNewsRecipe):
|
||||
,dict(name='strong', text='PUBLICIDAD')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
|
||||
|
||||
|
||||
language = _('Spanish')
|
@ -12,21 +12,24 @@ class LaSegunda(BasicNewsRecipe):
|
||||
title = 'La Segunda'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El sitio de noticias online de Chile'
|
||||
language = _('Spanish')
|
||||
publisher = 'La Segunda'
|
||||
category = 'news, politics, Chile'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Chile'
|
||||
, '--publisher' , title
|
||||
, '--ignore-tables'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='table')]
|
||||
|
||||
feeds = [
|
||||
@ -46,3 +49,13 @@ class LaSegunda(BasicNewsRecipe):
|
||||
rest, sep, article_id = url.partition('index.asp?idnoticia=')
|
||||
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(name='table', width=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -12,20 +12,24 @@ class LaTercera(BasicNewsRecipe):
|
||||
title = 'La Tercera'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El sitio de noticias online de Chile'
|
||||
publisher = 'La Tercera'
|
||||
category = 'news, politics, Chile'
|
||||
oldest_article = 2
|
||||
language = _('Spanish')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Chile'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'span-16 articulo border'}) ]
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='script')
|
||||
@ -50,4 +54,11 @@ class LaTercera(BasicNewsRecipe):
|
||||
,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
lanacion.com.ar
|
||||
'''
|
||||
@ -11,20 +11,23 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Lanacion(BasicNewsRecipe):
|
||||
title = 'La Nacion'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Informacion actualizada las 24 horas, con noticias de Argentina y del mundo - Informate ya!'
|
||||
description = 'Noticias de Argentina y el resto del mundo'
|
||||
publisher = 'La Nacion'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
language = _('Spanish')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, Argentina'
|
||||
, '--publisher', 'La Nacion SA'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
|
||||
remove_tags = [
|
||||
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
|
||||
@ -47,11 +50,11 @@ class Lanacion(BasicNewsRecipe):
|
||||
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
index = 'http://www.lanacion.com.ar'
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(index)
|
||||
cover_item = soup.find('img',attrs={'class':'logo'})
|
||||
if cover_item:
|
||||
cover_url = index + cover_item['src']
|
||||
return cover_url
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -13,20 +13,24 @@ class LaNacionChile(BasicNewsRecipe):
|
||||
title = 'La Nacion Chile'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El sitio de noticias online de Chile'
|
||||
publisher = 'La Nacion'
|
||||
category = 'news, politics, Chile'
|
||||
oldest_article = 2
|
||||
language = _('Spanish')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.lanacion.cl/prontus_noticias_v2/imag/site/logo.gif'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Chile'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'bloque'})]
|
||||
|
||||
feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')]
|
||||
@ -41,5 +45,10 @@ class LaNacionChile(BasicNewsRecipe):
|
||||
item = soup.find('a', attrs={'href':'javascript:window.close()'})
|
||||
if item:
|
||||
item.extract()
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
laprensa.com.ar
|
||||
'''
|
||||
@ -13,20 +13,24 @@ class LaPrensa(BasicNewsRecipe):
|
||||
title = 'La Prensa'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Informacion Libre las 24 horas'
|
||||
publisher = 'La Prensa'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 7
|
||||
language = _('Spanish')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
feeds = [
|
||||
(u'Politica' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=4' )
|
||||
,(u'Economia' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=5' )
|
||||
@ -47,5 +51,10 @@ class LaPrensa(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
del soup.body['onload']
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -7,12 +7,15 @@ nin.co.yu
|
||||
'''
|
||||
|
||||
import re, urllib
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Nin(BasicNewsRecipe):
|
||||
title = 'NIN online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Nedeljne informativne novine'
|
||||
publisher = 'NIN'
|
||||
category = 'news, politics, Serbia'
|
||||
no_stylesheets = True
|
||||
oldest_article = 15
|
||||
simultaneous_downloads = 1
|
||||
@ -22,12 +25,18 @@ class Nin(BasicNewsRecipe):
|
||||
PREFIX = 'http://www.nin.co.yu'
|
||||
INDEX = PREFIX + '/?change_lang=ls'
|
||||
LOGIN = PREFIX + '/?logout=true'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, politics, Serbia'
|
||||
, '--publisher' , 'NIN'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
def get_browser(self):
|
||||
@ -53,3 +62,12 @@ class Nin(BasicNewsRecipe):
|
||||
if link_item:
|
||||
cover_url = self.PREFIX + link_item['src']
|
||||
return cover_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -5,31 +5,45 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
novosti.rs
|
||||
'''
|
||||
import string,re
|
||||
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Novosti(BasicNewsRecipe):
|
||||
title = 'Vecernje Novosti'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'novosti, vesti, politika, dosije, drustvo, ekonomija, hronika, reportaze, svet, kultura, sport, beograd, regioni, mozaik, feljton, intrvju, pjer, fudbal, kosarka, podvig, arhiva, komentari, kolumne, srbija, republika srpska,Vecernje novosti'
|
||||
title = u'Vecernje Novosti'
|
||||
__author__ = u'Darko Miletic'
|
||||
description = u'Vesti'
|
||||
publisher = 'Kompanija Novosti'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, Serbia'
|
||||
, '--publisher', 'Novosti AD'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'jednaVest'})]
|
||||
remove_tags_after = dict(name='div', attrs={'class':'info_bottom'})
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'info'})
|
||||
,dict(name='div', attrs={'class':'info_bottom'})
|
||||
]
|
||||
remove_tags = [dict(name='div', attrs={'class':['info','info_bottom','clip_div']})]
|
||||
|
||||
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -6,28 +6,36 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
nspm.rs
|
||||
'''
|
||||
|
||||
import string,re
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Nspm(BasicNewsRecipe):
|
||||
title = u'Nova srpska politicka misao'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
|
||||
publisher = 'NSPM'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 7
|
||||
language = _('Serbian')
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
INDEX = 'http://www.nspm.rs/?alphabet=l'
|
||||
cover_url = 'http://nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, politics, Serbia'
|
||||
, '--publisher', 'IIC NSPM'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
remove_tags = [dict(name='a')]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
@ -38,3 +46,15 @@ class Nspm(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('.html','/stampa.html')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'sr-Latn-RS'
|
||||
soup.html['lang'] = 'sr-Latn-RS'
|
||||
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
|
||||
if ftag:
|
||||
ftag['content'] = 'sr-Latn-RS'
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -12,20 +12,24 @@ class OGlobo(BasicNewsRecipe):
|
||||
title = 'O Globo'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Brasil'
|
||||
publisher = 'O Globo'
|
||||
category = 'news, politics, Brasil'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
language = _('Spanish')
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://oglobo.globo.com/_img/o-globo.png'
|
||||
remove_javascript = True
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Brasil'
|
||||
, '--publisher' , title
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'ltintb'})]
|
||||
|
||||
remove_tags = [
|
||||
@ -56,3 +60,10 @@ class OGlobo(BasicNewsRecipe):
|
||||
,(u'Economia', u'http://oglobo.globo.com/rss/plantaoeconomia.xml')
|
||||
,(u'Tecnologia', u'http://oglobo.globo.com/rss/plantaotecnologia.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Portugese')
|
@ -13,11 +13,10 @@ class OutlookIndia(BasicNewsRecipe):
|
||||
|
||||
title = 'Outlook India'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Weekly news magazine focussed on India.'
|
||||
description = 'Weekly news magazine focused on India.'
|
||||
language = _('English')
|
||||
recursions = 1
|
||||
match_regexp = r'full.asp.*&pn=\d+'
|
||||
html2lrf_options = ['--ignore-tables']
|
||||
|
||||
remove_tags = [
|
||||
dict(name='img', src="images/space.gif"),
|
||||
@ -81,5 +80,8 @@ class OutlookIndia(BasicNewsRecipe):
|
||||
bad.append(table)
|
||||
for b in bad:
|
||||
b.extract()
|
||||
soup = soup.findAll('html')[0]
|
||||
for t in soup.findAll(['table', 'tr', 'td']):
|
||||
t.name = 'div'
|
||||
return soup
|
||||
|
||||
|
@ -1,32 +1,37 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
pagina12.com.ar
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Pagina12(BasicNewsRecipe):
|
||||
title = u'Pagina/12'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina y el resto del mundo'
|
||||
language = _('Spanish')
|
||||
publisher = 'La Pagina S.A.'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Argentina'
|
||||
, '--publisher' , 'La Pagina S.A.'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'volver'})
|
||||
@ -38,3 +43,12 @@ class Pagina12(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -6,30 +6,53 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
pescanik.net
|
||||
'''
|
||||
|
||||
import string,re
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Pescanik(BasicNewsRecipe):
|
||||
title = 'Pescanik'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Pescanik'
|
||||
publisher = 'Pescanik'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
html2lrf_options = ['--base-font-size', '10']
|
||||
html2epub_options = 'base_font_size = "10pt"'
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
remove_tags_after = dict(name='div', attrs={'class':'article_seperator'})
|
||||
|
||||
remove_tags = [dict(name='td' , attrs={'class':'buttonheading'})]
|
||||
remove_tags = [
|
||||
dict(name='td' , attrs={'class':'buttonheading'})
|
||||
,dict(name='span', attrs={'class':'article_seperator'})
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
|
||||
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
|
||||
|
||||
def print_version(self, url):
|
||||
nurl = url.replace('http://pescanik.net/index.php','http://pescanik.net/index2.php')
|
||||
nurl = url.replace('/index.php','/index2.php')
|
||||
return nurl + '&pop=1&page=0'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -5,37 +5,61 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
politika.rs
|
||||
'''
|
||||
import string,re
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Politika(BasicNewsRecipe):
|
||||
title = 'Politika Online'
|
||||
title = u'Politika Online'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Najstariji dnevni list na Balkanu'
|
||||
publisher = 'Politika novine i Magazini d.o.o'
|
||||
category = 'news, politics, Serbia'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
extra_css = '.content_center_border {text-align: left;}'
|
||||
use_embedded_content = False
|
||||
cover_url = 'http://www.politika.rs:8080/images/politika.gif'
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, Serbia'
|
||||
, '--publisher', 'POLITIKA NOVINE I MAGAZINI d.o.o.'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'class':'contentcenter'}) ]
|
||||
remove_tags_after = dict(name='div', attrs={'class':'datum_item_details'})
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'content_center_border'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['send_print','txt-komentar']})
|
||||
,dict(name=['object','link','a'])
|
||||
,dict(name='h1', attrs={'class':'box_header-tags'})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
|
||||
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
|
||||
,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
|
||||
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
|
||||
,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' )
|
||||
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
|
||||
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
|
||||
,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
||||
if ftag:
|
||||
ftag['align'] = 'left'
|
||||
return soup
|
||||
|
@ -17,7 +17,7 @@ class PetersburgTimes(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = _('Russian')
|
||||
language = _('English')
|
||||
INDEX = 'http://www.sptimes.ru'
|
||||
|
||||
def parse_index(self):
|
||||
|
59
src/calibre/web/feeds/recipes/recipe_starbulletin.py
Normal file
59
src/calibre/web/feeds/recipes/recipe_starbulletin.py
Normal file
@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
starbulletin.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Starbulletin(BasicNewsRecipe):
|
||||
title = 'Honolulu Star-Bulletin'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "Latest national and local Hawaii sports news"
|
||||
publisher = 'Honolulu Star-Bulletin'
|
||||
category = 'news, Honolulu, Hawaii'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
language = _('English')
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
cover_url = 'http://media.starbulletin.com/designimages/spacer.gif'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
, '--category' , category
|
||||
, '--publisher' , publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
keep_only_tags = [ dict(name='div', attrs={'id':'storyColoumn'}) ]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='span', attrs={'id':'printdesc'})
|
||||
,dict(name='div' , attrs={'class':'lightGreyBox storyTools clearAll'})
|
||||
,dict(name='div' , attrs={'id':'breadcrumbs'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Headlines', u'http://www.starbulletin.com/starbulletin_headlines.rss' )
|
||||
,(u'News', u'http://www.starbulletin.com/news/index.rss' )
|
||||
,(u'Sports', u'http://www.starbulletin.com/sports/index.rss' )
|
||||
,(u'Features', u'http://www.starbulletin.com/features/index.rss' )
|
||||
,(u'Editorials', u'http://www.starbulletin.com/editorials/index.rss' )
|
||||
,(u'Business', u'http://www.starbulletin.com/business/index.rss' )
|
||||
,(u'Travel', u'http://www.starbulletin.com/travel/index.rss' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
@ -1,13 +1,13 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
vijesti.cg.yu
|
||||
'''
|
||||
|
||||
import string,re
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -15,23 +15,35 @@ class Vijesti(BasicNewsRecipe):
|
||||
title = 'Vijesti'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Montenegro'
|
||||
oldest_article = 2
|
||||
publisher = 'Daily Press Vijesti'
|
||||
category = 'news, politics, Montenegro'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
encoding = 'cp1250'
|
||||
cover_url = 'http://www.vijesti.cg.yu/img/logo.gif'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category' , 'news, Montenegro'
|
||||
, '--publisher' , 'Daily Press Vijesti'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'align':'right'})
|
||||
,dict(name=['object','link'])
|
||||
]
|
||||
|
||||
feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
@ -39,4 +51,10 @@ class Vijesti(BasicNewsRecipe):
|
||||
soup.html['lang'] = 'sr-Latn-ME'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll('img'):
|
||||
if item.has_key('align'):
|
||||
del item['align']
|
||||
item.insert(0,'<br /><br />')
|
||||
return soup
|
||||
|
||||
language = _('Serbian')
|
@ -6,26 +6,34 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
vreme.com
|
||||
'''
|
||||
|
||||
import string,re
|
||||
import re
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Vreme(BasicNewsRecipe):
|
||||
|
||||
title = 'Vreme'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Politicki Nedeljnik Srbije'
|
||||
publisher = 'Vreme d.o.o.'
|
||||
category = 'news, politics, Serbia'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
needs_subscription = True
|
||||
INDEX = 'http://www.vreme.com'
|
||||
LOGIN = 'http://www.vreme.com/account/index.php'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--base-font-size', '10'
|
||||
, '--category', 'news, politics, Serbia'
|
||||
, '--publisher', 'Vreme d.o.o.'
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
def get_browser(self):
|
||||
@ -67,9 +75,28 @@ class Vreme(BasicNewsRecipe):
|
||||
})
|
||||
return [(soup.head.title.string, articles)]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='table',attrs={'xclass':'image'})
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&print=yes'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
del soup.body['text' ]
|
||||
del soup.body['bgcolor']
|
||||
del soup.body['onload' ]
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
tbl = soup.body.table
|
||||
tbbb = soup.find('td')
|
||||
if tbbb:
|
||||
tbbb.extract()
|
||||
tbl.extract()
|
||||
soup.body.insert(0,tbbb)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
@ -77,3 +104,5 @@ class Vreme(BasicNewsRecipe):
|
||||
if cover_item:
|
||||
cover_url = self.INDEX + cover_item['src']
|
||||
return cover_url
|
||||
|
||||
language = _('Serbian')
|
@ -12,19 +12,7 @@ class WashingtonPost(BasicNewsRecipe):
|
||||
language = _('English')
|
||||
|
||||
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
[
|
||||
(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
|
||||
(r'<div id="apple-rss-sidebar-background">.*?<!-- start Entries -->', lambda match : ''),
|
||||
(r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
|
||||
(r'<script.*?>.*?</script>', lambda match : ''),
|
||||
(r'<body.*?>.*?.correction {', lambda match : '<body><style>.correction {'),
|
||||
(r'<span class="display:none;" name="pubDate".*?>.*?</body>', lambda match : '<body>'),
|
||||
|
||||
|
||||
]
|
||||
]
|
||||
|
||||
remove_javascript = True
|
||||
|
||||
|
||||
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
|
||||
@ -38,8 +26,17 @@ class WashingtonPost(BasicNewsRecipe):
|
||||
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
|
||||
]
|
||||
|
||||
remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
|
||||
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('feedburner_origlink', article.get('link', None))
|
||||
|
||||
def print_version(self, url):
|
||||
return (url.rpartition('.')[0] + '_pf.html')
|
||||
return url.rpartition('.')[0] + '_pf.html'
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for div in soup.findAll(name='div', style=re.compile('margin')):
|
||||
div['style'] = ''
|
||||
return soup
|
||||
|
||||
|
@ -410,6 +410,7 @@ class RecursiveFetcher(object, LoggingInterface):
|
||||
_fname.decode('latin1', 'replace')
|
||||
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
|
||||
_fname = sanitize_file_name(_fname)
|
||||
_fname = os.path.splitext(_fname)[0]+'.xhtml'
|
||||
res = os.path.join(linkdiskpath, _fname)
|
||||
self.downloaded_paths.append(res)
|
||||
self.filemap[nurl] = res
|
||||
|
Loading…
x
Reference in New Issue
Block a user