Sync to trunk

This commit is contained in:
John Schember 2009-02-09 06:38:22 -05:00
commit 373d224738
87 changed files with 21628 additions and 15639 deletions

View File

@ -19,7 +19,7 @@ import mechanize
mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
mimetypes.add_type('http://www.w3.org/1999/xhtml', '.xhtml')
mimetypes.add_type('application/xhtml+xml', '.xhtml')
mimetypes.add_type('image/svg+xml', '.svg')
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.4.134'
__version__ = '0.4.135'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
'''
Various run time constants.

View File

@ -141,6 +141,7 @@ def set_file_type_metadata(stream, mi, ftype):
plugin.set_metadata(stream, mi, ftype.lower().strip())
break
except:
print 'Failed to set metadata for', repr(getattr(mi, 'title', ''))
traceback.print_exc()

View File

@ -58,9 +58,12 @@ class DeviceScanner(object):
return False
def is_device_connected(self, device):
vendor_ids = device.VENDOR_ID if hasattr(device.VENDOR_ID, '__len__') else [device.VENDOR_ID]
product_ids = device.PRODUCT_ID if hasattr(device.PRODUCT_ID, '__len__') else [device.PRODUCT_ID]
if iswindows:
vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID
vidd, pidd = 'vid_%i'%device.VENDOR_ID, 'pid_%i'%device.PRODUCT_ID
for vendor_id, product_id in zip(vendor_ids, product_ids):
vid, pid = 'vid_%4.4x'%vendor_id, 'pid_%4.4x'%product_id
vidd, pidd = 'vid_%i'%vendor_id, 'pid_%i'%product_id
for device_id in self.devices:
if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id):
if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)):
@ -68,7 +71,7 @@ class DeviceScanner(object):
return True
else:
for vendor, product, bcdDevice in self.devices:
if device.VENDOR_ID == vendor and device.PRODUCT_ID == product:
if vendor in vendor_ids and product in product_ids:
if self.test_bcd(bcdDevice, getattr(device, 'BCD', None)):
if device.can_handle((vendor, product, bcdDevice)):
return True

View File

@ -467,7 +467,7 @@ class Parser(PreProcessor, LoggingInterface):
if self.htmlfile.is_binary:
raise ValueError('Not a valid HTML file: '+self.htmlfile.path)
src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip()
src = src.replace('\x00', '')
src = src.replace('\x00', '').replace('\r', ' ')
src = self.preprocess(src)
# lxml chokes on unicode input when it contains encoding declarations
for pat in ENCODING_PATS:

View File

@ -917,7 +917,8 @@ class HTMLConverter(object, LoggingInterface):
blockStyle=self.current_block.blockStyle)
def process_image(self, path, tag_css, width=None, height=None, dropcaps=False):
def process_image(self, path, tag_css, width=None, height=None,
dropcaps=False, rescale=False):
def detect_encoding(im):
fmt = im.format
if fmt == 'JPG':
@ -936,10 +937,6 @@ class HTMLConverter(object, LoggingInterface):
return
encoding = detect_encoding(im)
if width == None or height == None:
width, height = im.size
factor = 720./self.profile.dpi
def scale_image(width, height):
if width <= 0:
@ -955,8 +952,15 @@ class HTMLConverter(object, LoggingInterface):
return pt.name
except (IOError, SystemError), err: # PIL chokes on interlaced PNG images as well a some GIF images
self.log_warning(_('Unable to process image %s. Error: %s')%(path, err))
return None
if width == None or height == None:
width, height = im.size
elif rescale and (width < im.size[0] or height < im.size[1]):
path = scale_image(width, height)
if not path:
return
factor = 720./self.profile.dpi
pheight = int(self.current_page.pageStyle.attrs['textheight'])
pwidth = int(self.current_page.pageStyle.attrs['textwidth'])
@ -1518,7 +1522,8 @@ class HTMLConverter(object, LoggingInterface):
except:
pass
dropcaps = tag.has_key('class') and tag['class'] == 'libprs500_dropcaps'
self.process_image(path, tag_css, width, height, dropcaps=dropcaps)
self.process_image(path, tag_css, width, height,
dropcaps=dropcaps, rescale=True)
elif not urlparse(tag['src'])[0]:
self.log_warn('Could not find image: '+tag['src'])
else:

View File

@ -287,7 +287,7 @@ class MetaInformation(object):
ans += [('ISBN', unicode(self.isbn))]
ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
if self.series:
ans += [(_('Series'), unicode(self.series))+ ' #%s'%self.format_series_index()]
ans += [(_('Series'), unicode(self.series)+ ' #%s'%self.format_series_index())]
ans += [(_('Language'), unicode(self.language))]
for i, x in enumerate(ans):
ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x

View File

@ -18,7 +18,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.metadata import get_parser, MetaInformation
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir, fit_image
from calibre import CurrentDir
class EPubException(Exception):
pass

View File

@ -38,8 +38,13 @@ def cover_from_isbn(isbn, timeout=5.):
browser = _browser()
_timeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(timeout)
src = None
try:
src = browser.open('http://www.librarything.com/isbn/'+isbn).read().decode('utf-8', 'replace')
except Exception, err:
if isinstance(getattr(err, 'args', [None])[0], socket.timeout):
raise LibraryThingError(_('LibraryThing.com timed out. Try again later.'))
raise
s = BeautifulSoup(src)
url = s.find('td', attrs={'class':'left'})
if url is None:

View File

@ -86,9 +86,9 @@ class MetadataUpdater(object):
image_base, = unpack('>I', record0[108:112])
flags, = unpack('>I', record0[128:132])
have_exth = self.have_exth = (flags & 0x40) != 0
self.cover_record = self.thumbnail_record = None
if not have_exth:
return
self.cover_record = self.thumbnail_record = None
exth_off = unpack('>I', record0[20:24])[0] + 16 + record0.start
exth = self.exth = StreamSlicer(stream, exth_off, record0.stop)
nitems, = unpack('>I', exth[8:12])
@ -143,6 +143,8 @@ class MetadataUpdater(object):
exth = ['EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad]
exth = ''.join(exth)
title = (mi.title or _('Unknown')).encode(self.codec, 'replace')
if getattr(self, 'exth', None) is None:
raise MobiError('No existing EXTH record. Cannot update metadata.')
title_off = (self.exth.start - self.record0.start) + len(exth)
title_len = len(title)
trail = len(self.exth) - len(exth) - len(title)
@ -151,8 +153,12 @@ class MetadataUpdater(object):
self.exth[:] = ''.join([exth, title, '\0' * trail])
self.record0[84:92] = pack('>II', title_off, title_len)
self.record0[92:96] = iana2mobi(mi.language)
if mi.cover_data[1]:
data = mi.cover_data[1]
if mi.cover_data[1] or mi.cover:
try:
data = mi.cover_data[1] if mi.cover_data[1] else open(mi.cover, 'rb').read()
except:
pass
else:
if self.cover_record is not None:
size = len(self.cover_record)
cover = rescale_image(data, size)

View File

@ -618,7 +618,7 @@ class OPF(object):
def fset(self, val):
remove = list(self.authors_path(self.metadata))
for elem in remove:
self.metadata.remove(elem)
elem.getparent().remove(elem)
for author in val:
attrib = {'{%s}role'%self.NAMESPACES['opf']: 'aut'}
elem = self.create_metadata_element('creator', attrib=attrib)

View File

@ -306,13 +306,15 @@ IANA_MOBI = \
'zu': {None: (53, 0)}}
def iana2mobi(icode):
langdict, subtags = IANA_MOBI[None], []
if icode:
subtags = list(icode.split('-'))
langdict = IANA_MOBI[None]
while len(subtags) > 0:
lang = subtags.pop(0).lower()
if lang in IANA_MOBI:
langdict = IANA_MOBI[lang]
break
mcode = langdict[None]
while len(subtags) > 0:
subtag = subtags.pop(0)

View File

@ -17,6 +17,8 @@ import types
import re
import copy
from itertools import izip
from weakref import WeakKeyDictionary
from xml.dom import SyntaxErr as CSSSyntaxError
import cssutils
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
CSSValueList, cssproperties
@ -106,7 +108,7 @@ class CSSSelector(etree.XPath):
class Stylizer(object):
STYLESHEETS = {}
STYLESHEETS = WeakKeyDictionary()
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
self.oeb = oeb
@ -131,18 +133,19 @@ class Stylizer(object):
and elem.get('type', CSS_MIME) in OEB_STYLES:
href = urlnormalize(elem.attrib['href'])
path = item.abshref(href)
if path not in oeb.manifest.hrefs:
sitem = oeb.manifest.hrefs.get(path, None)
if sitem is None:
self.logger.warn(
'Stylesheet %r referenced by file %r not in manifest' %
(path, item.href))
continue
if path in self.STYLESHEETS:
stylesheet = self.STYLESHEETS[path]
if sitem in self.STYLESHEETS:
stylesheet = self.STYLESHEETS[sitem]
else:
data = self._fetch_css_file(path)[1]
stylesheet = parser.parseString(data, href=path)
stylesheet.namespaces['h'] = XHTML_NS
self.STYLESHEETS[path] = stylesheet
self.STYLESHEETS[sitem] = stylesheet
stylesheets.append(stylesheet)
rules = []
index = 0
@ -291,10 +294,14 @@ class Style(object):
def _apply_style_attr(self):
attrib = self._element.attrib
if 'style' in attrib:
if 'style' not in attrib:
return
css = attrib['style'].split(';')
css = filter(None, map(lambda x: x.strip(), css))
css = filter(None, (x.strip() for x in css))
try:
style = CSSStyleDeclaration('; '.join(css))
except CSSSyntaxError:
return
self._style.update(self._stylizer.flatten_style(style))
def _has_parent(self):

View File

@ -105,36 +105,6 @@
<string>Book Cover</string>
</property>
<layout class="QGridLayout" name="_2" >
<item row="0" column="0" >
<layout class="QHBoxLayout" name="_3" >
<item>
<widget class="ImageView" name="cover" >
<property name="text" >
<string/>
</property>
<property name="pixmap" >
<pixmap resource="../images.qrc" >:/images/book.svg</pixmap>
</property>
<property name="scaledContents" >
<bool>true</bool>
</property>
<property name="alignment" >
<set>Qt::AlignCenter</set>
</property>
</widget>
</item>
</layout>
</item>
<item row="2" column="0" >
<widget class="QCheckBox" name="opt_prefer_metadata_cover" >
<property name="text" >
<string>Use cover from &amp;source file</string>
</property>
<property name="checked" >
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="0" >
<layout class="QVBoxLayout" name="_4" >
<property name="spacing" >
@ -186,6 +156,36 @@
</item>
</layout>
</item>
<item row="2" column="0" >
<widget class="QCheckBox" name="opt_prefer_metadata_cover" >
<property name="text" >
<string>Use cover from &amp;source file</string>
</property>
<property name="checked" >
<bool>true</bool>
</property>
</widget>
</item>
<item row="0" column="0" >
<layout class="QHBoxLayout" name="_3" >
<item>
<widget class="ImageView" name="cover" >
<property name="text" >
<string/>
</property>
<property name="pixmap" >
<pixmap resource="../images.qrc" >:/images/book.svg</pixmap>
</property>
<property name="scaledContents" >
<bool>true</bool>
</property>
<property name="alignment" >
<set>Qt::AlignCenter</set>
</property>
</widget>
</item>
</layout>
</item>
</layout>
<zorder>opt_prefer_metadata_cover</zorder>
<zorder></zorder>
@ -507,6 +507,13 @@
</property>
</widget>
</item>
<item row="9" column="0" >
<widget class="QCheckBox" name="opt_remove_first_image" >
<property name="text" >
<string>Remove &amp;first image from source file</string>
</property>
</widget>
</item>
</layout>
</item>
<item>

Binary file not shown.

After

Width:  |  Height:  |  Size: 983 B

View File

@ -18,6 +18,7 @@ from calibre.library.database2 import FIELD_MAP
from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
error_dialog
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
class LibraryDelegate(QItemDelegate):
COLOR = QColor("blue")
@ -423,7 +424,7 @@ class BooksModel(QAbstractTableModel):
def get_preferred_formats(self, rows, formats, paths=False):
def get_preferred_formats(self, rows, formats, paths=False, set_metadata=False):
ans = []
for row in (row.row() for row in rows):
format = None
@ -441,6 +442,9 @@ class BooksModel(QAbstractTableModel):
pt = PersistentTemporaryFile(suffix='.'+format)
pt.write(self.db.format(row, format))
pt.flush()
if set_metadata:
_set_metadata(pt, self.db.get_metadata(row, get_cover=True),
format)
pt.close() if paths else pt.seek(0)
ans.append(pt)
else:

View File

@ -701,7 +701,7 @@ class Main(MainWindow, Ui_MainWindow):
progress.show()
try:
for c, book in enumerate(paths):
progress.set_value(c)
progress.set_value(c+1)
if progress.canceled:
return
format = os.path.splitext(book)[1]
@ -722,9 +722,11 @@ class Main(MainWindow, Ui_MainWindow):
'cover':self.default_thumbnail, 'tags':[]})
title = mi.title if isinstance(mi.title, unicode) else mi.title.decode(preferred_encoding, 'replace')
progress.set_msg(_('Read metadata from ')+title)
QApplication.processEvents()
if not to_device:
progress.set_msg(_('Adding books to database...'))
QApplication.processEvents()
model = self.library_view.model()
paths = list(paths)
@ -741,6 +743,7 @@ class Main(MainWindow, Ui_MainWindow):
else:
self.upload_books(paths, list(map(sanitize_file_name, names)), infos, on_card=on_card)
finally:
QApplication.processEvents()
progress.hide()
def upload_books(self, files, names, metadata, on_card=False, memory=None):
@ -929,7 +932,8 @@ class Main(MainWindow, Ui_MainWindow):
mi['cover'] = self.cover_to_thumbnail(cdata)
metadata = iter(metadata)
_files = self.library_view.model().get_preferred_formats(rows,
self.device_manager.device_class.FORMATS, paths=True)
self.device_manager.device_class.FORMATS,
paths=True, set_metadata=True)
files = [getattr(f, 'name', None) for f in _files]
bad, good, gf, names, remove_ids = [], [], [], [], []
for f in files:
@ -1223,6 +1227,8 @@ class Main(MainWindow, Ui_MainWindow):
format = 'LRF'
if 'EPUB' in formats:
format = 'EPUB'
if 'MOBI' in formats:
format = 'MOBI'
if not formats:
d = error_dialog(self, _('Cannot view'),
_('%s has no available formats.')%(title,))

View File

@ -16,6 +16,7 @@ from calibre.utils.config import Config, StringConfig
from calibre.gui2.viewer.config_ui import Ui_Dialog
from calibre.gui2.viewer.js import bookmarks, referencing
from calibre.ptempfile import PersistentTemporaryFile
from calibre.constants import iswindows
def load_builtin_fonts():
from calibre.ebooks.lrf.fonts.liberation import LiberationMono_BoldItalic
@ -57,9 +58,12 @@ def config(defaults=None):
help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
fonts = c.add_group('FONTS', _('Font options'))
fonts('serif_family', default='Liberation Serif', help=_('The serif font family'))
fonts('sans_family', default='Liberation Sans', help=_('The sans-serif font family'))
fonts('mono_family', default='Liberation Mono', help=_('The monospaced font family'))
fonts('serif_family', default='Times New Roman' if iswindows else 'Liberation Serif',
help=_('The serif font family'))
fonts('sans_family', default='Verdana' if iswindows else 'Liberation Sans',
help=_('The sans-serif font family'))
fonts('mono_family', default='Courier New' if iswindows else 'Liberation Mono',
help=_('The monospaced font family'))
fonts('default_font_size', default=20, help=_('The standard font size in px'))
fonts('mono_font_size', default=16, help=_('The monospaced font size in px'))
fonts('standard_font', default='serif', help=_('The standard font type'))

View File

@ -4,12 +4,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Backend that implements storage of ebooks in an sqlite database.
'''
import sqlite3 as sqlite
import datetime, re, os, cPickle, traceback, sre_constants
import datetime, re, os, cPickle, sre_constants
from zlib import compress, decompress
from calibre import sanitize_file_name
from calibre.ebooks.metadata.meta import set_metadata, metadata_from_formats
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.web.feeds.recipes import migrate_automatic_profile_to_automatic_recipe
@ -1389,76 +1388,6 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
def all_ids(self):
return [i[0] for i in self.conn.get('SELECT id FROM books')]
def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
index_is_id=False, callback=None):
if not os.path.exists(dir):
raise IOError('Target directory does not exist: '+dir)
by_author = {}
count = 0
for index in indices:
id = index if index_is_id else self.id(index)
au = self.conn.get('SELECT author_sort FROM books WHERE id=?',
(id,), all=False)
if not au:
au = self.authors(index, index_is_id=index_is_id)
if not au:
au = _('Unknown')
au = au.split(',')[0]
if not by_author.has_key(au):
by_author[au] = []
by_author[au].append(index)
for au in by_author.keys():
apath = os.path.join(dir, sanitize_file_name(au))
if not single_dir and not os.path.exists(apath):
os.mkdir(apath)
for idx in by_author[au]:
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
tpath = os.path.join(apath, sanitize_file_name(title))
id = idx if index_is_id else self.id(idx)
id = str(id)
if not single_dir and not os.path.exists(tpath):
os.mkdir(tpath)
name = au + ' - ' + title if byauthor else title + ' - ' + au
name += '_'+id
base = dir if single_dir else tpath
mi = self.get_metadata(idx, index_is_id=index_is_id)
cover = self.cover(idx, index_is_id=index_is_id)
if cover is not None:
cname = sanitize_file_name(name) + '.jpg'
cpath = os.path.join(base, cname)
open(cpath, 'wb').write(cover)
mi.cover = cname
f = open(os.path.join(base, sanitize_file_name(name)+'.opf'), 'wb')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(base, mi)
opf.render(f)
f.close()
fmts = self.formats(idx, index_is_id=index_is_id)
if not fmts:
fmts = ''
for fmt in fmts.split(','):
data = self.format(idx, fmt, index_is_id=index_is_id)
if not data:
continue
fname = name +'.'+fmt.lower()
fname = sanitize_file_name(fname)
f = open(os.path.join(base, fname), 'w+b')
f.write(data)
f.flush()
f.seek(0)
try:
set_metadata(f, mi, fmt.lower())
except:
print 'Error setting metadata for book:', mi.title
traceback.print_exc()
f.close()
count += 1
if callable(callback):
if not callback(count, mi.title):
return
@ -1573,42 +1502,6 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
return duplicates
def export_single_format_to_dir(self, dir, indices, format,
index_is_id=False, callback=None):
dir = os.path.abspath(dir)
if not index_is_id:
indices = map(self.id, indices)
failures = []
for count, id in enumerate(indices):
try:
data = self.format(id, format, index_is_id=True)
if not data:
failures.append((id, self.title(id, index_is_id=True)))
continue
except:
failures.append((id, self.title(id, index_is_id=True)))
continue
title = self.title(id, index_is_id=True)
au = self.authors(id, index_is_id=True)
if not au:
au = _('Unknown')
fname = '%s - %s.%s'%(title, au, format.lower())
fname = sanitize_file_name(fname)
if not os.path.exists(dir):
os.makedirs(dir)
f = open(os.path.join(dir, fname), 'w+b')
f.write(data)
f.seek(0)
try:
set_metadata(f, self.get_metadata(id, index_is_id=True), stream_type=format.lower())
except:
pass
f.close()
if callable(callback):
if not callback(count, title):
break
return failures
class SearchToken(object):

View File

@ -19,8 +19,9 @@ from calibre.library import title_sort
from calibre.library.database import LibraryDatabase
from calibre.library.sqlite import connect, IntegrityError
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.ebooks.metadata import string_to_authors, authors_to_string
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata import string_to_authors, authors_to_string, MetaInformation
from calibre.ebooks.metadata.meta import get_metadata, set_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding
from calibre.ptempfile import PersistentTemporaryFile
from calibre.customize.ui import run_plugins_on_import
@ -556,7 +557,8 @@ class LibraryDatabase2(LibraryDatabase):
traceback.print_exc()
continue
def cover(self, index, index_is_id=False, as_file=False, as_image=False):
def cover(self, index, index_is_id=False, as_file=False, as_image=False,
as_path=False):
'''
Return the cover image as a bytestring (in JPEG format) or None.
@ -566,6 +568,8 @@ class LibraryDatabase2(LibraryDatabase):
id = index if index_is_id else self.id(index)
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
if os.access(path, os.R_OK):
if as_path:
return path
f = open(path, 'rb')
if as_image:
img = QImage()
@ -573,6 +577,30 @@ class LibraryDatabase2(LibraryDatabase):
return img
return f if as_file else f.read()
def get_metadata(self, idx, index_is_id=False, get_cover=False):
'''
Convenience method to return metadata as a L{MetaInformation} object.
'''
aum = self.authors(idx, index_is_id=index_is_id)
if aum: aum = [a.strip().replace('|', ',') for a in aum.split(',')]
mi = MetaInformation(self.title(idx, index_is_id=index_is_id), aum)
mi.author_sort = self.author_sort(idx, index_is_id=index_is_id)
mi.comments = self.comments(idx, index_is_id=index_is_id)
mi.publisher = self.publisher(idx, index_is_id=index_is_id)
tags = self.tags(idx, index_is_id=index_is_id)
if tags:
mi.tags = [i.strip() for i in tags.split(',')]
mi.series = self.series(idx, index_is_id=index_is_id)
if mi.series:
mi.series_index = self.series_index(idx, index_is_id=index_is_id)
mi.rating = self.rating(idx, index_is_id=index_is_id)
mi.isbn = self.isbn(idx, index_is_id=index_is_id)
id = idx if index_is_id else self.id(idx)
mi.application_id = id
if get_cover:
mi.cover = self.cover(id, index_is_id=True, as_path=True)
return mi
def has_book(self, mi):
title = mi.title
if title:
@ -1323,4 +1351,106 @@ books_series_link feeds
progress.reset()
return len(books)
def export_to_dir(self, dir, indices, byauthor=False, single_dir=False,
index_is_id=False, callback=None):
if not os.path.exists(dir):
raise IOError('Target directory does not exist: '+dir)
by_author = {}
count = 0
for index in indices:
id = index if index_is_id else self.id(index)
au = self.conn.get('SELECT author_sort FROM books WHERE id=?',
(id,), all=False)
if not au:
au = self.authors(index, index_is_id=index_is_id)
if not au:
au = _('Unknown')
au = au.split(',')[0]
if not by_author.has_key(au):
by_author[au] = []
by_author[au].append(index)
for au in by_author.keys():
apath = os.path.join(dir, sanitize_file_name(au))
if not single_dir and not os.path.exists(apath):
os.mkdir(apath)
for idx in by_author[au]:
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
tpath = os.path.join(apath, sanitize_file_name(title))
id = idx if index_is_id else self.id(idx)
id = str(id)
if not single_dir and not os.path.exists(tpath):
os.mkdir(tpath)
name = au + ' - ' + title if byauthor else title + ' - ' + au
name += '_'+id
base = dir if single_dir else tpath
mi = self.get_metadata(idx, index_is_id=index_is_id, get_cover=True)
f = open(os.path.join(base, sanitize_file_name(name)+'.opf'), 'wb')
if not mi.authors:
mi.authors = [_('Unknown')]
opf = OPFCreator(base, mi)
opf.render(f)
f.close()
fmts = self.formats(idx, index_is_id=index_is_id)
if not fmts:
fmts = ''
for fmt in fmts.split(','):
data = self.format(idx, fmt, index_is_id=index_is_id)
if not data:
continue
fname = name +'.'+fmt.lower()
fname = sanitize_file_name(fname)
f = open(os.path.join(base, fname), 'w+b')
f.write(data)
f.flush()
f.seek(0)
try:
set_metadata(f, mi, fmt.lower())
except:
pass
f.close()
count += 1
if callable(callback):
if not callback(count, mi.title):
return
def export_single_format_to_dir(self, dir, indices, format,
index_is_id=False, callback=None):
dir = os.path.abspath(dir)
if not index_is_id:
indices = map(self.id, indices)
failures = []
for count, id in enumerate(indices):
try:
data = self.format(id, format, index_is_id=True)
if not data:
failures.append((id, self.title(id, index_is_id=True)))
continue
except:
failures.append((id, self.title(id, index_is_id=True)))
continue
title = self.title(id, index_is_id=True)
au = self.authors(id, index_is_id=True)
if not au:
au = _('Unknown')
fname = '%s - %s.%s'%(title, au, format.lower())
fname = sanitize_file_name(fname)
if not os.path.exists(dir):
os.makedirs(dir)
f = open(os.path.join(dir, fname), 'w+b')
f.write(data)
f.seek(0)
try:
set_metadata(f, self.get_metadata(id, index_is_id=True, get_cover=True),
stream_type=format.lower())
except:
pass
f.close()
if callable(callback):
if not callback(count, title):
break
return failures

View File

@ -236,7 +236,9 @@ Donors per day: %(dpd).2f
ml = mdates.MonthLocator() # every month
fig = plt.figure(1, (8, 4), 96)#, facecolor, edgecolor, frameon, FigureClass)
ax = fig.add_subplot(111)
average = sum(y)/len(y)
ax.bar(x, y, align='center', width=20, color='g')
ax.hlines([average], x[0], x[-1])
ax.xaxis.set_major_locator(ml)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %y'))
ax.set_xlim(_months[0].min-timedelta(days=15), _months[-1].min+timedelta(days=15))

View File

@ -30,7 +30,7 @@ class Distribution(object):
('libusb', '0.1.12', None, None, None),
('Qt', '4.4.0', 'qt', 'libqt4-core libqt4-gui', 'qt4'),
('PyQt', '4.4.2', 'PyQt4', 'python-qt4', 'PyQt4'),
('mechanize for python', '0.1.8', 'dev-python/mechanize', 'python-mechanize', 'python-mechanize'),
('mechanize for python', '0.1.11', 'dev-python/mechanize', 'python-mechanize', 'python-mechanize'),
('ImageMagick', '6.3.5', 'imagemagick', 'imagemagick', 'ImageMagick'),
('xdg-utils', '1.0.2', 'xdg-utils', 'xdg-utils', 'xdg-utils'),
('dbus-python', '0.82.2', 'dbus-python', 'python-dbus', 'dbus-python'),

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -91,7 +91,7 @@ class BasicNewsRecipe(object, LoggingInterface):
#: If True stylesheets are not downloaded and processed
no_stylesheets = False
#: Convenient flag to strip all javascripts tags from the downloaded HTML
#: Convenient flag to strip all javascript tags from the downloaded HTML
remove_javascript = True
#: If True the GUI will ask the user for a username and password

View File

@ -26,7 +26,8 @@ recipe_modules = ['recipe_' + r for r in (
'laprensa', 'amspec', 'freakonomics', 'criticadigital', 'elcronista',
'shacknews', 'teleread', 'granma', 'juventudrebelde', 'juventudrebelde_english',
'la_tercera', 'el_mercurio_chile', 'la_cuarta', 'lanacion_chile', 'la_segunda',
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz', 'exiled',
'jb_online', 'estadao', 'o_globo', 'vijesti', 'elmundo', 'the_oz',
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
)]
import re, imp, inspect, time, os

View File

@ -1,32 +1,39 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
ambito.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Ambito(BasicNewsRecipe):
title = 'Ambito.com'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
publisher = 'Ambito.com'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'iso--8859-1'
language = _('Spanish')
encoding = 'iso-8859-1'
cover_url = 'http://www.ambito.com/img/logo_.jpg'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'align':'justify'})]
remove_tags = [dict(name=['object','link'])]
feeds = [
(u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' )
,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' )
@ -43,3 +50,12 @@ class Ambito(BasicNewsRecipe):
def print_version(self, url):
return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?')
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -7,25 +7,33 @@ b92.net
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class B92(BasicNewsRecipe):
title = u'B92'
title = 'B92'
__author__ = 'Darko Miletic'
language = _('Serbian')
description = 'Dnevne vesti iz Srbije i sveta'
oldest_article = 7
oldest_article = 2
publisher = 'B92.net'
category = 'news, politics, Serbia'
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
cover_url = 'http://static.b92.net/images/fp/logo.gif'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
keep_only_tags = [ dict(name='div', attrs={'class':'sama_vest'}) ]
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'B92'
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [
(u'Vesti', u'http://www.b92.net/info/rss/vesti.xml')
@ -44,3 +52,16 @@ class B92(BasicNewsRecipe):
if biz:
nurl = 'http://www.b92.net/mobilni/biz/index.php?nav_id=' + article_id
return nurl
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn'
soup.html['lang'] = 'sr-Latn'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(name='img',align=True):
del item['align']
item.insert(0,'<br /><br />')
return soup
language = _('Serbian')

View File

@ -5,31 +5,49 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
blic.rs
'''
import string,re
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Blic(BasicNewsRecipe):
title = u'Blic'
__author__ = 'Darko Miletic'
description = 'Blic.rs online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
oldest_article = 7
__author__ = u'Darko Miletic'
description = u'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
publisher = 'RINGIER d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
cover_url = 'http://www.blic.rs/resources/images/header_back_tile.png'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'Blic'
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'class':'single_news'}) ]
keep_only_tags = [dict(name='div', attrs={'class':'single_news'})]
feeds = [ (u'Vesti', u'http://www.blic.rs/rssall.php')]
feeds = [(u'Vesti', u'http://www.blic.rs/rssall.php')]
remove_tags = [dict(name=['object','link'])]
def print_version(self, url):
start_url, question, rest_url = url.partition('?')
return u'http://www.blic.rs/_print.php?' + rest_url
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -1,32 +1,36 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
clarin.com
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Clarin(BasicNewsRecipe):
title = 'Clarin'
__author__ = 'Darko Miletic'
description = 'Noticias de Argentina y mundo'
publisher = 'Grupo Clarin'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 100
language = _('Spanish')
use_embedded_content = False
no_stylesheets = True
cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg')
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Argentina'
, '--publisher', 'Grupo Clarin'
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [
dict(name='a' , attrs={'class':'Imp' })
,dict(name='div' , attrs={'class':'Perma' })
@ -49,3 +53,12 @@ class Clarin(BasicNewsRecipe):
rest = artl.partition('-0')[-1]
lmain = rest.partition('.')[0]
return 'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -5,37 +5,47 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
danas.rs
'''
import string,re
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Danas(BasicNewsRecipe):
title = 'Danas'
title = u'Danas'
__author__ = 'Darko Miletic'
description = 'Dnevne novine sa vestima iz sveta, politike, ekonomije, kulture, sporta, Beograda, Novog Sada i cele Srbije.'
description = 'Vesti'
publisher = 'Danas d.o.o.'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
no_stylesheets = False
remove_javascript = True
use_embedded_content = False
cover_url = 'http://www.danas.rs/images/basic/danas.gif'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'Danas'
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'id':'left'}) ]
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
remove_tags = [
dict(name='div', attrs={'class':'width_1_4' })
,dict(name='div', attrs={'class':'metaClanka' })
,dict(name='div', attrs={'id':'comments' })
,dict(name='div', attrs={'class':'baner' })
,dict(name='div', attrs={'class':'slikaClanka'})
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
,dict(name='div', attrs={'id':'comments'})
,dict(name=['object','link'])
]
feeds = [(u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
feeds = [ (u'Vesti', u'http://www.danas.rs/rss/rss.asp')]
def print_version(self, url):
return url + '&action=print'
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DeStandaard(BasicNewsRecipe):
title = u'De Standaard'
__author__ = u'Darko Miletic'
language = _('French')
language = _('Dutch')
description = u'News from Belgium'
oldest_article = 7
max_articles_per_feed = 100

View File

@ -13,6 +13,7 @@ class DeMorganBe(BasicNewsRecipe):
__author__ = u'Darko Miletic'
description = u'News from Belgium'
oldest_article = 7
language = _('Dutch')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False

View File

@ -0,0 +1,41 @@
'''
dnaindia.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class DNAIndia(BasicNewsRecipe):
title = 'DNA India'
description = 'Mumbai news, India news, World news, breaking news'
__author__ = 'Kovid Goyal'
language = _('English')
feeds = [
('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
('Popular News', 'http://www.dnaindia.com/syndication/rss_popular.xml'),
('Recent Columns', 'http://www.dnaindia.com/syndication/rss_column.xml'),
('Mumbai', 'http://www.dnaindia.com/syndication/rss,catid-1.xml'),
('India', 'http://www.dnaindia.com/syndication/rss,catid-2.xml'),
('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),
]
remove_tags = [{'id':'footer'}, {'class':['bottom', 'categoryHead']}]
def print_version(self, url):
match = re.search(r'newsid=(\d+)', url)
if not match:
return url
return 'http://www.dnaindia.com/dnaprint.asp?newsid='+match.group(1)
def postprocess_html(self, soup, first_fetch):
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
a = soup.find(href='http://www.3dsyndication.com/')
if a is not None:
a.parent.extract()
return soup

View File

@ -5,32 +5,37 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
emol.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElMercurio(BasicNewsRecipe):
title = 'El Mercurio online'
language = _('Spanish')
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
publisher = 'El Mercurio'
category = 'news, politics, Chile'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Chile'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [
dict(name='div', attrs={'class':'despliegue-txt_750px'})
,dict(name='div', attrs={'id':'div_cuerpo_participa'})
]
remove_tags = [
dict(name='div', attrs={'class':'contenedor_despliegue-col-left300'})
,dict(name='div', attrs={'id':['div_centro_dn_opc','div_cabezera','div_secciones','div_contenidos','div_pie','nav']})
@ -46,3 +51,11 @@ class ElMercurio(BasicNewsRecipe):
,(u'La Musica', u'http://www.emol.com/rss20/rss.asp?canal=7')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
elargentino.com
'''
@ -12,20 +12,24 @@ class ElArgentino(BasicNewsRecipe):
title = 'ElArgentino.com'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
language = _('Spanish')
publisher = 'ElArgentino.com'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
cover_url = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , 'ElArgentino.com'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [
dict(name='div', attrs={'id':'noprint' })
,dict(name='div', attrs={'class':'encabezadoImprimir'})
@ -50,7 +54,10 @@ class ElArgentino(BasicNewsRecipe):
return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
soup.head.insert(0,mtag)
soup.prettify()
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -12,35 +12,49 @@ class ElMundo(BasicNewsRecipe):
title = 'El Mundo'
__author__ = 'Darko Miletic'
description = 'News from Spain'
language = _('Spanish')
publisher = 'El Mundo'
category = 'news, politics, Spain'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'iso8859_15'
cover_url = 'http://estaticos02.cache.el-mundo.net/papel/imagenes/v2.0/logoverde.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Spain'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
keep_only_tags = [dict(name='div', attrs={'class':'noticia'})]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [
dict(name='div', attrs={'id':['bloqueprincipal','noticia']})
,dict(name='div', attrs={'class':['contenido_noticia_01']})
]
remove_tags = [
dict(name='div', attrs={'class':['herramientas','publicidad_google','video','herramientasarriba','contenido_noticia_02']})
dict(name='div', attrs={'class':['herramientas','publicidad_google']})
,dict(name='div', attrs={'id':'modulo_multimedia' })
,dict(name=['object','script','link', 'a'])
,dict(name='ul', attrs={'class':'herramientas'})
,dict(name='ul', attrs={'class':'herramientas' })
,dict(name=['object','link'])
]
feeds = [
(u'Portada' , u'http://rss.elmundo.es/rss/descarga.htm?data2=4' )
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
,(u'Espana' , u'http://rss.elmundo.es/rss/descarga.htm?data2=8' )
,(u'Internacional' , u'http://rss.elmundo.es/rss/descarga.htm?data2=9' )
,(u'Cultura' , u'http://rss.elmundo.es/rss/descarga.htm?data2=6' )
,(u'Ciencia/Ecologia', u'http://rss.elmundo.es/rss/descarga.htm?data2=5' )
,(u'Comunicacion' , u'http://rss.elmundo.es/rss/descarga.htm?data2=26')
,(u'Television' , u'http://rss.elmundo.es/rss/descarga.htm?data2=76')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -8,25 +8,28 @@ estadao.com.br
from calibre.web.feeds.news import BasicNewsRecipe
class Estadao(BasicNewsRecipe):
title = 'O Estado de S. Paulo'
__author__ = 'Darko Miletic'
description = 'News from Brasil'
language = _('Spanish')
description = 'News from Brasil in Portugese'
publisher = 'O Estado de S. Paulo'
category = 'news, politics, Brasil'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
cover_url = 'http://www.estadao.com.br/img/logo_estadao.png'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Brasil'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'c1'})]
remove_tags = [
@ -52,4 +55,8 @@ class Estadao(BasicNewsRecipe):
ifr = soup.find('iframe')
if ifr:
ifr.extract()
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Portugese')

View File

@ -7,37 +7,46 @@ granma.cubaweb.cu
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Granma(BasicNewsRecipe):
title = 'Diario Granma'
__author__ = 'Darko Miletic'
language = _('Spanish')
description = 'Organo oficial del Comite Central del Partido Comunista de Cuba'
publisher = 'Granma'
category = 'news, politics, Cuba'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.granma.cubaweb.cu/imagenes/granweb229d.jpg'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Cuba'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='table', attrs={'height':'466'})]
feeds = [(u'Noticias', u'http://www.granma.cubaweb.cu/noticias.xml' )]
def preprocess_html(self, soup):
del soup.body.table['style']
rtag = soup.find('td', attrs={'height':'458'})
if rtag:
del rtag['style']
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
soup.head.insert(0,mtag)
for item in soup.findAll('table'):
if item.has_key('width'):
del item['width']
if item.has_key('height'):
del item['height']
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
harpers.org - paid subscription/ printed issue articles
This recipe only get's article's published in text format
@ -9,13 +9,15 @@ images and pdf's are ignored
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Harpers_full(BasicNewsRecipe):
title = u"Harper's Magazine - articles from printed edition"
__author__ = u'Darko Miletic'
description = u"Harper's Magazine: Founded June 1850."
language = _('English')
publisher = "Harpers's"
category = 'news, politics, USA'
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
@ -26,6 +28,15 @@ class Harpers_full(BasicNewsRecipe):
INDEX = strftime('http://www.harpers.org/archive/%Y/%m')
LOGIN = 'http://www.harpers.org'
cover_url = strftime('http://www.harpers.org/media/pages/%Y/%m/gif/0001.gif')
remove_javascript = True
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ]
remove_tags = [
@ -60,3 +71,10 @@ class Harpers_full(BasicNewsRecipe):
,'description':''
})
return [(soup.head.title.string, articles)]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('English')

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
honoluluadvertiser.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Honoluluadvertiser(BasicNewsRecipe):
title = 'Honolulu Advertiser'
__author__ = 'Darko Miletic'
description = "Latest national and local Hawaii sports news from The Honolulu Advertiser."
publisher = 'Honolulu Advertiser'
category = 'news, Honolulu, Hawaii'
oldest_article = 2
language = _('English')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
cover_url = 'http://www.honoluluadvertiser.com/graphics/branding.gif'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher' , publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='td')]
remove_tags = [dict(name=['object','link'])]
feeds = [
(u'Breaking news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS01&MIME=XML' )
,(u'Local news', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS02&MIME=XML' )
,(u'Sports', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS03&MIME=XML' )
,(u'Island life', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS05&MIME=XML' )
,(u'Entertainment', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS06&MIME=XML' )
,(u'Business', u'http://www.honoluluadvertiser.com/apps/pbcs.dll/section?Category=RSS04&MIME=XML' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
soup.head.insert(0,mtag)
return soup
def print_version(self, url):
ubody, sep, rest = url.rpartition('/-1/')
root, sep2, article_id = ubody.partition('/article/')
return u'http://www.honoluluadvertiser.com/apps/pbcs.dll/article?AID=/' + article_id + '&template=printart'

View File

@ -0,0 +1,15 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1234144423(BasicNewsRecipe):
title = u'Indianapolis Star'
oldest_article = 5
language = _('English')
__author__ = 'Owen Kelly'
max_articles_per_feed = 100
cover_url = u'http://www2.indystar.com/frontpage/images/today.jpg'
feeds = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss&mime=XML'), (u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss&mime=XML'), (u'Business Headlines', u'http://www..indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss&mime=XML'), (u'Sports Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=SPORTS&template=rss&mime=XML'), (u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'), (u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')]
def print_version(self, url):
return url + '&template=printart'

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
infobae.com
'''
@ -12,21 +12,23 @@ class Infobae(BasicNewsRecipe):
title = 'Infobae.com'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
publisher = 'Infobae.com'
category = 'news, politics, Argentina'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'iso-8859-1'
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , 'Infobae.com'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [
(u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' )
@ -39,3 +41,12 @@ class Infobae(BasicNewsRecipe):
main, sep, article_part = url.partition('contenidos/')
article_id, rsep, rrest = article_part.partition('-')
return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -12,20 +12,24 @@ class JBOnline(BasicNewsRecipe):
title = 'Jornal Brasileiro Online'
__author__ = 'Darko Miletic'
description = 'News from Brasil'
publisher = 'Jornal Brasileiro'
category = 'news, politics, Brasil'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://jbonline.terra.com.br/img/logo_01.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Brasil'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'corpoNoticia'})]
remove_tags = [dict(name=['script','object','form'])]
@ -36,7 +40,8 @@ class JBOnline(BasicNewsRecipe):
ifr = soup.find('iframe')
if ifr:
ifr.extract()
item = soup.find('div', attrs={'id':'corpoNoticia'})
if item:
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Portugese')

View File

@ -6,28 +6,36 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
jutarnji.hr
'''
import string, re
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Jutarnji(BasicNewsRecipe):
title = 'Jutarnji'
__author__ = 'Darko Miletic'
description = 'Online izdanje Jutarnjeg lista'
title = u'Jutarnji'
__author__ = u'Darko Miletic'
description = u'Hrvatski portal'
publisher = 'Jutarnji.hr'
category = 'news, politics, Croatia'
oldest_article = 2
max_articles_per_feed = 100
simultaneous_downloads = 1
delay = 1
language = _('Croatian')
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'cp1250'
cover_url = 'http://www.jutarnji.hr/EPHResources/Images/2008/06/05/jhrlogo.png'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Croatia'
, '--publisher', 'Europapress holding d.o.o.'
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [
@ -49,11 +57,16 @@ class Jutarnji(BasicNewsRecipe):
def print_version(self, url):
main, split, rest = url.partition('.jl')
rmain, rsplit, rrest = main.rpartition(',')
return u'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
return 'http://www.jutarnji.hr/ispis_clanka.jl?artid=' + rrest
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
soup.head.insert(0,mtag)
soup.prettify()
mtag = '<meta http-equiv="Content-Language" content="hr"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(width=True):
del item['width']
return soup

View File

@ -13,21 +13,25 @@ class Juventudrebelde(BasicNewsRecipe):
title = 'Juventud Rebelde'
__author__ = 'Darko Miletic'
description = 'Diario de la Juventud Cubana'
publisher = 'Juventud rebelde'
category = 'news, politics, Cuba'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = strftime('http://www.juventudrebelde.cu/UserFiles/File/impreso/iportada-%Y-%m-%d.jpg')
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Cuba'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'noticia'})]
feeds = [
@ -40,4 +44,11 @@ class Juventudrebelde(BasicNewsRecipe):
,(u'Lectura', u'http://www.juventudrebelde.cu/rss/generales.php?seccion=lectura' )
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -5,7 +5,6 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
juventudrebelde.co.cu
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
@ -13,22 +12,33 @@ class Juventudrebelde_english(BasicNewsRecipe):
title = 'Juventud Rebelde in english'
__author__ = 'Darko Miletic'
description = 'The newspaper of Cuban Youth'
language = _('English')
publisher = 'Juventud Rebelde'
category = 'news, politics, Cuba'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'iso-8859-1'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Cuba'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'read'})]
feeds = [(u'All news', u'http://www.juventudrebelde.cip.cu/rss/all/' )]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CU"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('English')

View File

@ -11,25 +11,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
class LaCuarta(BasicNewsRecipe):
title = 'La Cuarta'
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
description = 'La Cuarta Cibernetica: El Diario popular'
publisher = 'CODISA, Consorcio Digital S.A.'
category = 'news, politics, entertainment, Chile'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Chile'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'articulo desplegado'}) ]
remove_tags = [
dict(name='script')
,dict(name='ul')
dict(name='ul')
,dict(name='div', attrs={'id':['toolbox','articleImageDisplayer','enviarAmigo']})
,dict(name='div', attrs={'class':['par ad-1','par ad-2']})
,dict(name='input')
@ -37,7 +40,14 @@ class LaCuarta(BasicNewsRecipe):
,dict(name='strong', text='PUBLICIDAD')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
feeds = [(u'Noticias', u'http://lacuarta.cl/app/rss?sc=TEFDVUFSVEE=')]
language = _('Spanish')

View File

@ -12,21 +12,24 @@ class LaSegunda(BasicNewsRecipe):
title = 'La Segunda'
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
language = _('Spanish')
publisher = 'La Segunda'
category = 'news, politics, Chile'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Chile'
, '--publisher' , title
, '--ignore-tables'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='table')]
feeds = [
@ -46,3 +49,13 @@ class LaSegunda(BasicNewsRecipe):
rest, sep, article_id = url.partition('index.asp?idnoticia=')
return u'http://www.lasegunda.com/edicionOnline/include/secciones/_detalle_impresion.asp?idnoticia=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(name='table', width=True):
del item['width']
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -12,20 +12,24 @@ class LaTercera(BasicNewsRecipe):
title = 'La Tercera'
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
publisher = 'La Tercera'
category = 'news, politics, Chile'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Chile'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
keep_only_tags = [dict(name='div', attrs={'class':'span-16 articulo border'}) ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':['span-16 articulo border','span-16 border','span-16']}) ]
remove_tags = [
dict(name='script')
@ -50,4 +54,11 @@ class LaTercera(BasicNewsRecipe):
,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657')
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
lanacion.com.ar
'''
@ -11,20 +11,23 @@ from calibre.web.feeds.news import BasicNewsRecipe
class Lanacion(BasicNewsRecipe):
title = 'La Nacion'
__author__ = 'Darko Miletic'
description = 'Informacion actualizada las 24 horas, con noticias de Argentina y del mundo - Informate ya!'
description = 'Noticias de Argentina y el resto del mundo'
publisher = 'La Nacion'
category = 'news, politics, Argentina'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
no_stylesheets = True
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Argentina'
, '--publisher', 'La Nacion SA'
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'nota floatFix'})]
remove_tags = [
dict(name='div' , attrs={'class':'notaComentario floatFix noprint' })
@ -47,11 +50,11 @@ class Lanacion(BasicNewsRecipe):
,(u'Revista' , u'http://www.lanacion.com.ar/herramientas/rss/index.asp?categoria_id=494' )
]
def get_cover_url(self):
index = 'http://www.lanacion.com.ar'
cover_url = None
soup = self.index_to_soup(index)
cover_item = soup.find('img',attrs={'class':'logo'})
if cover_item:
cover_url = index + cover_item['src']
return cover_url
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -13,20 +13,24 @@ class LaNacionChile(BasicNewsRecipe):
title = 'La Nacion Chile'
__author__ = 'Darko Miletic'
description = 'El sitio de noticias online de Chile'
publisher = 'La Nacion'
category = 'news, politics, Chile'
oldest_article = 2
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.lanacion.cl/prontus_noticias_v2/imag/site/logo.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Chile'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'class':'bloque'})]
feeds = [(u'Noticias', u'http://www.lanacion.cl/rss.xml')]
@ -41,5 +45,10 @@ class LaNacionChile(BasicNewsRecipe):
item = soup.find('a', attrs={'href':'javascript:window.close()'})
if item:
item.extract()
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
laprensa.com.ar
'''
@ -13,20 +13,24 @@ class LaPrensa(BasicNewsRecipe):
title = 'La Prensa'
__author__ = 'Darko Miletic'
description = 'Informacion Libre las 24 horas'
publisher = 'La Prensa'
category = 'news, politics, Argentina'
oldest_article = 7
language = _('Spanish')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://www.laprensa.com.ar/imgs/logo.gif'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
feeds = [
(u'Politica' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=4' )
,(u'Economia' , u'http://www.laprensa.com.ar/Rss.aspx?Rss=5' )
@ -47,5 +51,10 @@ class LaPrensa(BasicNewsRecipe):
def preprocess_html(self, soup):
del soup.body['onload']
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -7,12 +7,15 @@ nin.co.yu
'''
import re, urllib
from calibre.web.feeds.news import BasicNewsRecipe
class Nin(BasicNewsRecipe):
title = 'NIN online'
__author__ = 'Darko Miletic'
description = 'Nedeljne informativne novine'
publisher = 'NIN'
category = 'news, politics, Serbia'
no_stylesheets = True
oldest_article = 15
simultaneous_downloads = 1
@ -22,12 +25,18 @@ class Nin(BasicNewsRecipe):
PREFIX = 'http://www.nin.co.yu'
INDEX = PREFIX + '/?change_lang=ls'
LOGIN = PREFIX + '/?logout=true'
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, politics, Serbia'
, '--publisher' , 'NIN'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
def get_browser(self):
@ -53,3 +62,12 @@ class Nin(BasicNewsRecipe):
if link_item:
cover_url = self.PREFIX + link_item['src']
return cover_url
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -5,31 +5,45 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
novosti.rs
'''
import string,re
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Novosti(BasicNewsRecipe):
title = 'Vecernje Novosti'
__author__ = 'Darko Miletic'
description = 'novosti, vesti, politika, dosije, drustvo, ekonomija, hronika, reportaze, svet, kultura, sport, beograd, regioni, mozaik, feljton, intrvju, pjer, fudbal, kosarka, podvig, arhiva, komentari, kolumne, srbija, republika srpska,Vecernje novosti'
title = u'Vecernje Novosti'
__author__ = u'Darko Miletic'
description = u'Vesti'
publisher = 'Kompanija Novosti'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'Novosti AD'
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'class':'jednaVest'}) ]
remove_tags_after = dict(name='div', attrs={'class':'info_bottom'})
remove_tags = [
dict(name='div', attrs={'class':'info'})
,dict(name='div', attrs={'class':'info_bottom'})
]
keep_only_tags = [dict(name='div', attrs={'class':'jednaVest'})]
remove_tags = [dict(name='div', attrs={'class':['info','info_bottom','clip_div']})]
feeds = [ (u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
feeds = [(u'Vesti', u'http://www.novosti.rs/php/vesti/rss.php')]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -6,35 +6,55 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
nspm.rs
'''
import string,re
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Nspm(BasicNewsRecipe):
title = u'Nova srpska politicka misao'
__author__ = 'Darko Miletic'
description = 'Casopis za politicku teoriju i drustvena istrazivanja'
publisher = 'NSPM'
category = 'news, politics, Serbia'
oldest_article = 7
language = _('Serbian')
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
INDEX = 'http://www.nspm.rs/?alphabet=l'
cover_url = 'http://nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
encoding = 'utf8'
remove_javascript = True
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, politics, Serbia'
, '--publisher', 'IIC NSPM'
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags = [dict(name='a')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open(self.INDEX)
return br
feeds = [ (u'Nova srpska politicka misao', u'http://www.nspm.rs/feed/rss.html')]
feeds = [(u'Nova srpska politicka misao', u'http://www.nspm.rs/feed/rss.html')]
def print_version(self, url):
return url.replace('.html','/stampa.html')
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-RS'
soup.html['lang'] = 'sr-Latn-RS'
ftag = soup.find('meta',attrs={'http-equiv':'Content-Language'})
if ftag:
ftag['content'] = 'sr-Latn-RS'
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -12,20 +12,24 @@ class OGlobo(BasicNewsRecipe):
title = 'O Globo'
__author__ = 'Darko Miletic'
description = 'News from Brasil'
publisher = 'O Globo'
category = 'news, politics, Brasil'
oldest_article = 2
max_articles_per_feed = 100
language = _('Spanish')
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = 'http://oglobo.globo.com/_img/o-globo.png'
remove_javascript = True
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Brasil'
, '--publisher' , title
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [dict(name='div', attrs={'id':'ltintb'})]
remove_tags = [
@ -56,3 +60,10 @@ class OGlobo(BasicNewsRecipe):
,(u'Economia', u'http://oglobo.globo.com/rss/plantaoeconomia.xml')
,(u'Tecnologia', u'http://oglobo.globo.com/rss/plantaotecnologia.xml')
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Portugese')

View File

@ -13,11 +13,10 @@ class OutlookIndia(BasicNewsRecipe):
title = 'Outlook India'
__author__ = 'Kovid Goyal'
description = 'Weekly news magazine focussed on India.'
description = 'Weekly news magazine focused on India.'
language = _('English')
recursions = 1
match_regexp = r'full.asp.*&pn=\d+'
html2lrf_options = ['--ignore-tables']
remove_tags = [
dict(name='img', src="images/space.gif"),
@ -81,5 +80,8 @@ class OutlookIndia(BasicNewsRecipe):
bad.append(table)
for b in bad:
b.extract()
soup = soup.findAll('html')[0]
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
return soup

View File

@ -1,32 +1,37 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
'''
pagina12.com.ar
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Pagina12(BasicNewsRecipe):
title = u'Pagina/12'
__author__ = 'Darko Miletic'
description = 'Noticias de Argentina y el resto del mundo'
language = _('Spanish')
publisher = 'La Pagina S.A.'
category = 'news, politics, Argentina'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'cp1252'
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Argentina'
, '--publisher' , 'La Pagina S.A.'
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
remove_tags = [
dict(name='div', attrs={'id':'volver'})
@ -38,3 +43,12 @@ class Pagina12(BasicNewsRecipe):
def print_version(self, url):
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-AR"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -6,30 +6,53 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
pescanik.net
'''
import string,re
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Pescanik(BasicNewsRecipe):
title = 'Pescanik'
__author__ = 'Darko Miletic'
description = 'Pescanik'
publisher = 'Pescanik'
category = 'news, politics, Serbia'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
html2lrf_options = ['--base-font-size', '10']
html2epub_options = 'base_font_size = "10pt"'
remove_javascript = True
encoding = 'utf8'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
cover_url = "http://pescanik.net/templates/ja_teline/images/logo.png"
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
remove_tags_after = dict(name='div', attrs={'class':'article_seperator'})
remove_tags = [dict(name='td' , attrs={'class':'buttonheading'})]
remove_tags = [
dict(name='td' , attrs={'class':'buttonheading'})
,dict(name='span', attrs={'class':'article_seperator'})
,dict(name=['object','link'])
]
feeds = [(u'Pescanik Online', u'http://pescanik.net/index.php?option=com_rd_rss&id=12')]
def print_version(self, url):
nurl = url.replace('http://pescanik.net/index.php','http://pescanik.net/index2.php')
nurl = url.replace('/index.php','/index2.php')
return nurl + '&pop=1&page=0'
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Serbian')

View File

@ -5,37 +5,61 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
politika.rs
'''
import string,re
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Politika(BasicNewsRecipe):
title = 'Politika Online'
title = u'Politika Online'
__author__ = 'Darko Miletic'
description = 'Najstariji dnevni list na Balkanu'
publisher = 'Politika novine i Magazini d.o.o'
category = 'news, politics, Serbia'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
extra_css = '.content_center_border {text-align: left;}'
use_embedded_content = False
cover_url = 'http://www.politika.rs:8080/images/politika.gif'
remove_javascript = True
encoding = 'utf8'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, Serbia'
, '--publisher', 'POLITIKA NOVINE I MAGAZINI d.o.o.'
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [ dict(name='div', attrs={'class':'contentcenter'}) ]
remove_tags_after = dict(name='div', attrs={'class':'datum_item_details'})
keep_only_tags = [dict(name='div', attrs={'class':'content_center_border'})]
remove_tags = [
dict(name='div', attrs={'class':['send_print','txt-komentar']})
,dict(name=['object','link','a'])
,dict(name='h1', attrs={'class':'box_header-tags'})
]
feeds = [
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' )
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
]
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-RS"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
ftag = soup.find('div',attrs={'class':'content_center_border'})
if ftag:
ftag['align'] = 'left'
return soup

View File

@ -17,7 +17,7 @@ class PetersburgTimes(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = _('Russian')
language = _('English')
INDEX = 'http://www.sptimes.ru'
def parse_index(self):

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
starbulletin.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Starbulletin(BasicNewsRecipe):
title = 'Honolulu Star-Bulletin'
__author__ = 'Darko Miletic'
description = "Latest national and local Hawaii sports news"
publisher = 'Honolulu Star-Bulletin'
category = 'news, Honolulu, Hawaii'
oldest_article = 2
max_articles_per_feed = 100
language = _('English')
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
remove_javascript = True
cover_url = 'http://media.starbulletin.com/designimages/spacer.gif'
html2lrf_options = [
'--comment' , description
, '--category' , category
, '--publisher' , publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
keep_only_tags = [ dict(name='div', attrs={'id':'storyColoumn'}) ]
remove_tags = [
dict(name=['object','link'])
,dict(name='span', attrs={'id':'printdesc'})
,dict(name='div' , attrs={'class':'lightGreyBox storyTools clearAll'})
,dict(name='div' , attrs={'id':'breadcrumbs'})
]
feeds = [
(u'Headlines', u'http://www.starbulletin.com/starbulletin_headlines.rss' )
,(u'News', u'http://www.starbulletin.com/news/index.rss' )
,(u'Sports', u'http://www.starbulletin.com/sports/index.rss' )
,(u'Features', u'http://www.starbulletin.com/features/index.rss' )
,(u'Editorials', u'http://www.starbulletin.com/editorials/index.rss' )
,(u'Business', u'http://www.starbulletin.com/business/index.rss' )
,(u'Travel', u'http://www.starbulletin.com/travel/index.rss' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n'
soup.head.insert(0,mtag)
return soup

View File

@ -1,13 +1,13 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
vijesti.cg.yu
'''
import string,re
import re
from calibre.web.feeds.news import BasicNewsRecipe
@ -15,23 +15,35 @@ class Vijesti(BasicNewsRecipe):
title = 'Vijesti'
__author__ = 'Darko Miletic'
description = 'News from Montenegro'
oldest_article = 2
publisher = 'Daily Press Vijesti'
category = 'news, politics, Montenegro'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'cp1250'
cover_url = 'http://www.vijesti.cg.yu/img/logo.gif'
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Montenegro'
, '--publisher' , 'Daily Press Vijesti'
]
keep_only_tags = [dict(name='div', attrs={'id':'mainnews'})]
remove_tags = [
dict(name='div', attrs={'align':'right'})
,dict(name=['object','link'])
]
feeds = [(u'Sve vijesti', u'http://www.vijesti.cg.yu/rss.php' )]
def preprocess_html(self, soup):
@ -39,4 +51,10 @@ class Vijesti(BasicNewsRecipe):
soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag)
for item in soup.findAll('img'):
if item.has_key('align'):
del item['align']
item.insert(0,'<br /><br />')
return soup
language = _('Serbian')

View File

@ -6,26 +6,34 @@ __copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
vreme.com
'''
import string,re
import re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Vreme(BasicNewsRecipe):
title = 'Vreme'
__author__ = 'Darko Miletic'
description = 'Politicki Nedeljnik Srbije'
publisher = 'Vreme d.o.o.'
category = 'news, politics, Serbia'
no_stylesheets = True
remove_javascript = True
needs_subscription = True
INDEX = 'http://www.vreme.com'
LOGIN = 'http://www.vreme.com/account/index.php'
remove_javascript = True
use_embedded_content = False
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "monospace1";src:url(res:///opt/sony/ebook/FONT/tt0419m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: left; font-family: serif1, serif} .article_date{font-family: monospace1, monospace} .article_description{font-family: sans1, sans-serif} .navbar{font-family: monospace1, monospace}'
html2lrf_options = [
'--comment', description
, '--base-font-size', '10'
, '--category', 'news, politics, Serbia'
, '--publisher', 'Vreme d.o.o.'
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
def get_browser(self):
@ -67,9 +75,28 @@ class Vreme(BasicNewsRecipe):
})
return [(soup.head.title.string, articles)]
remove_tags = [
dict(name=['object','link'])
,dict(name='table',attrs={'xclass':'image'})
]
def print_version(self, url):
return url + '&print=yes'
def preprocess_html(self, soup):
del soup.body['text' ]
del soup.body['bgcolor']
del soup.body['onload' ]
mtag = '<meta http-equiv="Content-Language" content="sr-Latn"/>'
soup.head.insert(0,mtag)
tbl = soup.body.table
tbbb = soup.find('td')
if tbbb:
tbbb.extract()
tbl.extract()
soup.body.insert(0,tbbb)
return soup
def get_cover_url(self):
cover_url = None
soup = self.index_to_soup(self.INDEX)
@ -77,3 +104,5 @@ class Vreme(BasicNewsRecipe):
if cover_item:
cover_url = self.INDEX + cover_item['src']
return cover_url
language = _('Serbian')

View File

@ -12,19 +12,7 @@ class WashingtonPost(BasicNewsRecipe):
language = _('English')
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
[
(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
(r'<div id="apple-rss-sidebar-background">.*?<!-- start Entries -->', lambda match : ''),
(r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
(r'<script.*?>.*?</script>', lambda match : ''),
(r'<body.*?>.*?.correction {', lambda match : '<body><style>.correction {'),
(r'<span class="display:none;" name="pubDate".*?>.*?</body>', lambda match : '<body>'),
]
]
remove_javascript = True
feeds = [ ('Today\'s Highlights', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/03/24/LI2005032400102.xml'),
@ -38,8 +26,17 @@ class WashingtonPost(BasicNewsRecipe):
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
]
remove_tags = [{'id':['pfmnav', 'ArticleCommentsWrapper']}]
def get_article_url(self, article):
return article.get('feedburner_origlink', article.get('link', None))
def print_version(self, url):
return (url.rpartition('.')[0] + '_pf.html')
return url.rpartition('.')[0] + '_pf.html'
def postprocess_html(self, soup, first):
for div in soup.findAll(name='div', style=re.compile('margin')):
div['style'] = ''
return soup

View File

@ -410,6 +410,7 @@ class RecursiveFetcher(object, LoggingInterface):
_fname.decode('latin1', 'replace')
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
_fname = sanitize_file_name(_fname)
_fname = os.path.splitext(_fname)[0]+'.xhtml'
res = os.path.join(linkdiskpath, _fname)
self.downloaded_paths.append(res)
self.filemap[nurl] = res