Pull from trunk

This commit is contained in:
Kovid Goyal 2009-02-25 10:17:13 -08:00
commit a28bb69bb3
47 changed files with 6652 additions and 5975 deletions

View File

@ -36,6 +36,7 @@ def freeze():
'/lib/libbz2.so.1',
'/usr/lib/libpoppler.so.4',
'/usr/lib/libxml2.so.2',
'/usr/lib/libdbus-1.so.3',
'/usr/lib/libxslt.so.1',
'/usr/lib/libxslt.so.1',
'/usr/lib/libgthread-2.0.so.0',

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.4.139'
__version__ = '0.4.140'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
'''
Various run time constants.

View File

@ -11,7 +11,8 @@ def devices():
from calibre.devices.prs700.driver import PRS700
from calibre.devices.cybookg3.driver import CYBOOKG3
from calibre.devices.kindle.driver import KINDLE
return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE)
from calibre.devices.kindle.driver import KINDLE2
return (PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2)
import time

View File

@ -150,10 +150,13 @@ class Device(object):
the device.
@param locations: Result of a call to L{upload_books}
@param metadata: List of dictionaries. Each dictionary must have the
keys C{title}, C{authors}, C{cover}, C{tags}. The value of the C{cover}
keys C{title}, C{authors}, C{author_sort}, C{cover}, C{tags}.
The value of the C{cover}
element can be None or a three element tuple (width, height, data)
where data is the image data in JPEG format as a string. C{tags} must be
a possibly empty list of strings. C{authors} must be a string.
C{author_sort} may be None. It is upto the driver to decide whether to
use C{author_sort} or not.
The dictionary can also have an optional key "tag order" which should be
another dictionary that maps tag names to lists of book ids. The ids are
ids from the book database.

View File

@ -4,13 +4,13 @@ __copyright__ = '2009, John Schember <john at nachtimwald.com>'
Device driver for Amazon's Kindle
'''
import os, fnmatch
import os
from calibre.devices.usbms.driver import USBMS
class KINDLE(USBMS):
# Ordered list of supported formats
FORMATS = ['azw', 'mobi', 'prc', 'txt']
FORMATS = ['azw', 'mobi', 'prc', 'azw1', 'tpz', 'txt']
VENDOR_ID = [0x1949]
PRODUCT_ID = [0x0001]
@ -35,10 +35,13 @@ class KINDLE(USBMS):
if os.path.exists(path):
os.unlink(path)
filepath, ext = os.path.splitext(path)
basepath, filename = os.path.split(filepath)
filepath = os.path.splitext(path)[0]
# Delete the ebook auxiliary file
if os.path.exists(filepath + '.mbp'):
os.unlink(filepath + '.mbp')
class KINDLE2(KINDLE):
PRODUCT_ID = [0x0002]
BCD = [0x0100]

View File

@ -55,7 +55,7 @@ class Book(object):
title = book_metadata_field("title")
authors = book_metadata_field("author", \
formatter=lambda x: x if x and x.strip() else "Unknown")
formatter=lambda x: x if x and x.strip() else _('Unknown'))
mime = book_metadata_field("mime")
rpath = book_metadata_field("path")
id = book_metadata_field("id", formatter=int)
@ -193,7 +193,7 @@ class BookList(_BookList):
attrs = {
"title" : info["title"],
'titleSorter' : sortable_title(info['title']),
"author" : info["authors"] if info['authors'] else 'Unknown', \
"author" : info["authors"] if info['authors'] else _('Unknown'),
"page":"0", "part":"0", "scale":"0", \
"sourceid":sourceid, "id":str(cid), "date":"", \
"mime":mime, "path":name, "size":str(size)

View File

@ -98,7 +98,7 @@ class EbookIterator(object):
url = re.compile(r'url\s*\([\'"]*(.+?)[\'"]*\)', re.DOTALL).search(block)
if url:
path = url.group(1).split('/')
path = os.path.join(os.path.dirname(item.path), *path)
path = os.path.join(os.path.dirname(item.path), *path)
id = QFontDatabase.addApplicationFont(path)
if id != -1:
families = [unicode(f) for f in QFontDatabase.applicationFontFamilies(id)]
@ -106,6 +106,8 @@ class EbookIterator(object):
family = family.group(1).strip().replace('"', '')
if family not in families:
print 'WARNING: Family aliasing not supported:', block
else:
print 'Loaded embedded font:', repr(family)
def __enter__(self):
self._tdir = TemporaryDirectory('_ebook_iter')

View File

@ -284,7 +284,11 @@ class UnBinary(object):
state = 'get attr'
elif count > 0:
if not in_censorship:
buf.write(encode(c))
if c == '"':
c = '&quot;'
elif c == '<':
c = '&lt;'
self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
count -= 1
if count == 0:
if not in_censorship:

View File

@ -3,6 +3,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''''''
import sys, os, subprocess, logging
import errno
from functools import partial
from calibre import isosx, setup_cli_handlers, filename_to_utf8, iswindows, islinux
from calibre.ebooks import ConversionError, DRMError
@ -41,14 +42,26 @@ def generate_html(pathtopdf, tdir):
try:
os.chdir(tdir)
try:
p = popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
p = popen(cmd, stderr=subprocess.PIPE)
except OSError, err:
if err.errno == 2:
raise ConversionError(_('Could not find pdftohtml, check it is in your PATH'), True)
else:
raise
'''
print p.stdout.read()
ret = p.wait()
'''
while True:
try:
ret = p.wait()
break
except OSError, e:
if e.errno == errno.EINTR:
continue
else:
raise
if ret != 0:
err = p.stderr.read()
raise ConversionError, err

View File

@ -18,7 +18,7 @@ from calibre.ebooks.chardet import xml_to_unicode
from calibre import relpath
from calibre.constants import __appname__, __version__
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata import MetaInformation, string_to_authors
class Resource(object):
@ -614,7 +614,7 @@ class OPF(object):
def fget(self):
ans = []
for elem in self.authors_path(self.metadata):
ans.extend([x.strip() for x in self.get_text(elem).split(',')])
ans.extend(string_to_authors(self.get_text(elem)))
return ans
def fset(self, val):
@ -624,8 +624,8 @@ class OPF(object):
for author in val:
attrib = {'{%s}role'%self.NAMESPACES['opf']: 'aut'}
elem = self.create_metadata_element('creator', attrib=attrib)
self.set_text(elem, author)
self.set_text(elem, author.strip())
return property(fget=fget, fset=fset)
@dynamic_property

View File

@ -505,7 +505,7 @@ def get_metadata(stream):
except:
import traceback
traceback.print_exc()
return mi
return mi
def option_parser():

View File

@ -37,17 +37,20 @@ class KeyMapper(object):
@staticmethod
def relate(size, base):
if size == 0:
return base
size = float(size)
base = float(base)
if abs(size - base) < 0.1: return 0
sign = -1 if size < base else 1
endp = 0 if size < base else 36
diff = (abs(base - size) * 3) + ((36 - size) / 100)
logb = abs(base - endp)
logb = abs(base - endp)
result = sign * math.log(diff, logb)
return result
def __getitem__(self, ssize):
ssize = asfloat(ssize, 0)
if ssize in self.cache:
return self.cache[ssize]
dsize = self.map(ssize)
@ -66,6 +69,7 @@ class ScaleMapper(object):
self.dscale = float(dbase) / float(sbase)
def __getitem__(self, ssize):
ssize = asfloat(ssize, 0)
dsize = ssize * self.dscale
return dsize

View File

@ -90,19 +90,25 @@ class AddFiles(Add):
def run(self):
self.canceled = False
for c, book in enumerate(self.paths):
if self.pd.canceled:
self.canceled = True
break
format = os.path.splitext(book)[1]
format = format[1:] if format else None
stream = open(book, 'rb')
self.formats.append(format)
self.names.append(os.path.basename(book))
self.get_metadata(c, stream, stream_type=format,
use_libprs_metadata=True)
self.wait_for_condition()
try:
self.canceled = False
for c, book in enumerate(self.paths):
if self.pd.canceled:
self.canceled = True
break
format = os.path.splitext(book)[1]
format = format[1:] if format else None
stream = open(book, 'rb')
self.formats.append(format)
self.names.append(os.path.basename(book))
self.get_metadata(c, stream, stream_type=format,
use_libprs_metadata=True)
self.wait_for_condition()
finally:
self.disconnect(self.get_metadata,
SIGNAL('metadata(PyQt_PyObject, PyQt_PyObject)'),
self.metadata_delivered)
self.get_metadata = None
def process_duplicates(self):
@ -178,34 +184,40 @@ class AddRecursive(Add):
def run(self):
root = os.path.abspath(self.path)
for dirpath in os.walk(root):
if self.is_canceled():
return
self.emit(SIGNAL('update(PyQt_PyObject)'),
_('Searching in')+' '+dirpath[0])
self.books += list(self.db.find_books_in_directory(dirpath[0],
self.single_book_per_directory))
self.books = [formats for formats in self.books if formats]
# Reset progress bar
self.emit(SIGNAL('searching_done()'))
for c, formats in enumerate(self.books):
self.get_metadata.from_formats(c, formats)
self.wait_for_condition()
try:
root = os.path.abspath(self.path)
for dirpath in os.walk(root):
if self.is_canceled():
return
self.emit(SIGNAL('update(PyQt_PyObject)'),
_('Searching in')+' '+dirpath[0])
self.books += list(self.db.find_books_in_directory(dirpath[0],
self.single_book_per_directory))
self.books = [formats for formats in self.books if formats]
# Reset progress bar
self.emit(SIGNAL('searching_done()'))
for c, formats in enumerate(self.books):
self.get_metadata.from_formats(c, formats)
self.wait_for_condition()
# Add books to database
for c, x in enumerate(self.metadata):
mi, formats = x
if self.is_canceled():
break
if self.db.has_book(mi):
self.duplicates.append((mi, formats))
else:
self.db.import_book(mi, formats, notify=False)
self.number_of_books_added += 1
self.emit(SIGNAL('pupdate(PyQt_PyObject)'), c)
finally:
self.disconnect(self.get_metadata,
SIGNAL('metadataf(PyQt_PyObject, PyQt_PyObject)'),
self.metadata_delivered)
self.get_metadata = None
# Add books to database
for c, x in enumerate(self.metadata):
mi, formats = x
if self.is_canceled():
break
if self.db.has_book(mi):
self.duplicates.append((mi, formats))
else:
self.db.import_book(mi, formats, notify=False)
self.number_of_books_added += 1
self.emit(SIGNAL('pupdate(PyQt_PyObject)'), c)
def process_duplicates(self):
if self.duplicates:

View File

@ -180,11 +180,12 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.toolbar_button_size.setCurrentIndex(0 if icons == self.ICON_SIZES[0] else 1 if icons == self.ICON_SIZES[1] else 2)
self.show_toolbar_text.setChecked(config['show_text_in_toolbar'])
for ext in BOOK_EXTENSIONS:
book_exts = sorted(BOOK_EXTENSIONS)
for ext in book_exts:
self.single_format.addItem(ext.upper(), QVariant(ext))
single_format = config['save_to_disk_single_format']
self.single_format.setCurrentIndex(BOOK_EXTENSIONS.index(single_format))
self.single_format.setCurrentIndex(book_exts.index(single_format))
self.cover_browse.setValue(config['cover_flow_queue_length'])
self.systray_notifications.setChecked(not config['disable_tray_notification'])
from calibre.translations.compiled import translations
@ -203,7 +204,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.pdf_metadata.setChecked(prefs['read_file_metadata'])
added_html = False
for ext in BOOK_EXTENSIONS:
for ext in book_exts:
ext = ext.lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
if ext == 'lrf' or is_supported('book.'+ext):

Binary file not shown.

After

Width:  |  Height:  |  Size: 365 B

View File

@ -20,6 +20,7 @@ from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
error_dialog
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
from calibre.ebooks.metadata import string_to_authors
class LibraryDelegate(QItemDelegate):
COLOR = QColor("blue")
@ -364,12 +365,13 @@ class BooksModel(QAbstractTableModel):
return data
def get_metadata(self, rows, rows_are_ids=False):
metadata = []
def get_metadata(self, rows, rows_are_ids=False, full_metadata=False):
metadata, _full_metadata = [], []
if not rows_are_ids:
rows = [self.db.id(row.row()) for row in rows]
for id in rows:
mi = self.db.get_metadata(id, index_is_id=True)
_full_metadata.append(mi)
au = authors_to_string(mi.authors if mi.authors else [_('Unknown')])
tags = mi.tags if mi.tags else []
if mi.series is not None:
@ -377,6 +379,7 @@ class BooksModel(QAbstractTableModel):
info = {
'title' : mi.title,
'authors' : au,
'author_sort' : mi.author_sort,
'cover' : self.db.cover(id, index_is_id=True),
'tags' : tags,
'comments': mi.comments,
@ -387,7 +390,10 @@ class BooksModel(QAbstractTableModel):
}
metadata.append(info)
return metadata
if full_metadata:
return metadata, _full_metadata
else:
return metadata
def get_preferred_formats_from_ids(self, ids, all_formats, mode='r+b'):
ans = []
@ -928,12 +934,8 @@ class DeviceBooksModel(BooksModel):
au = self.unknown
if role == Qt.EditRole:
return QVariant(au)
au = au.split(',')
authors = []
for i in au:
authors += i.strip().split('&')
jau = [ a.strip() for a in authors ]
return QVariant("\n".join(jau))
authors = string_to_authors(au)
return QVariant("\n".join(authors))
elif col == 2:
size = self.db[self.map[row]].size
return QVariant(BooksView.human_readable(size))

View File

@ -1,3 +1,4 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, sys, textwrap, collections, traceback, time
@ -43,7 +44,6 @@ from calibre.gui2.dialogs.search import SearchDialog
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
from calibre.gui2.dialogs.book_info import BookInfo
from calibre.ebooks.metadata.meta import set_metadata
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks import BOOK_EXTENSIONS
from calibre.library.database2 import LibraryDatabase2, CoverCache
from calibre.parallel import JobKilled
@ -398,7 +398,7 @@ class Main(MainWindow, Ui_MainWindow):
def change_output_format(self, x):
of = unicode(x).strip()
if of != prefs['output_format']:
if of not in ('LRF', 'EPUB'):
if of not in ('LRF', 'EPUB', 'MOBI'):
warning_dialog(self, 'Warning',
'<p>%s support is still in beta. If you find bugs, please report them by opening a <a href="http://calibre.kovidgoyal.net">ticket</a>.'%of).exec_()
prefs.set('output_format', of)
@ -910,12 +910,13 @@ class Main(MainWindow, Ui_MainWindow):
if not self.device_manager or not rows or len(rows) == 0:
return
ids = iter(self.library_view.model().id(r) for r in rows)
metadata = self.library_view.model().get_metadata(rows)
metadata, full_metadata = self.library_view.model().get_metadata(
rows, full_metadata=True)
for mi in metadata:
cdata = mi['cover']
if cdata:
mi['cover'] = self.cover_to_thumbnail(cdata)
metadata = iter(metadata)
metadata, full_metadata = iter(metadata), iter(full_metadata)
_files = self.library_view.model().get_preferred_formats(rows,
self.device_manager.device_class.FORMATS,
paths=True, set_metadata=True,
@ -923,22 +924,15 @@ class Main(MainWindow, Ui_MainWindow):
files = [getattr(f, 'name', None) for f in _files]
bad, good, gf, names, remove_ids = [], [], [], [], []
for f in files:
mi = metadata.next()
mi, smi = metadata.next(), full_metadata.next()
id = ids.next()
if f is None:
bad.append(mi['title'])
else:
remove_ids.append(id)
aus = mi['authors'].split(',')
aus2 = []
for a in aus:
aus2.extend(a.split('&'))
try:
smi = MetaInformation(mi['title'], aus2)
smi.comments = mi.get('comments', None)
_f = open(f, 'r+b')
set_metadata(_f, smi, f.rpartition('.')[2])
_f.close()
with open(f, 'r+b') as _f:
set_metadata(_f, smi, f.rpartition('.')[2])
except:
print 'Error setting metadata in book:', mi['title']
traceback.print_exc()

View File

@ -9,7 +9,7 @@ from PyQt4.Qt import QMovie, QApplication, Qt, QIcon, QTimer, QWidget, SIGNAL, \
QDesktopServices, QDoubleSpinBox, QLabel, QTextBrowser, \
QPainter, QBrush, QColor, QStandardItemModel, QPalette, \
QStandardItem, QUrl, QRegExpValidator, QRegExp, QLineEdit, \
QToolButton, QMenu, QInputDialog
QToolButton, QMenu, QInputDialog, QAction
from calibre.gui2.viewer.main_ui import Ui_EbookViewer
from calibre.gui2.main_window import MainWindow
@ -221,8 +221,14 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.view.set_manager(self)
self.pi = ProgressIndicator(self)
self.toc.setVisible(False)
self.action_quit = QAction(self)
self.addAction(self.action_quit)
self.action_quit.setShortcut(Qt.CTRL+Qt.Key_Q)
self.connect(self.action_quit, SIGNAL('triggered(bool)'),
lambda x:QApplication.instance().quit())
self.action_copy.setDisabled(True)
self.action_metadata.setCheckable(True)
self.action_metadata.setShortcut(Qt.CTRL+Qt.Key_I)
self.action_table_of_contents.setCheckable(True)
self.action_reference_mode.setCheckable(True)
self.connect(self.action_reference_mode, SIGNAL('triggered(bool)'),

View File

@ -174,6 +174,9 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
return template.generate(data=data).render('xml')
elif output_format == 'stanza':
data = [i for i in data if i.has_key('fmt_epub')]
for x in data:
if isinstance(x['fmt_epub'], unicode):
x['fmt_epub'] = x['fmt_epub'].encode('utf-8')
template = MarkupTemplate(STANZA_TEMPLATE)
return template.generate(id="urn:calibre:main", data=data, subtitle=subtitle,
sep=os.sep, quote=quote, updated=db.last_modified()).render('xml')

View File

@ -916,12 +916,18 @@ class LibraryDatabase2(LibraryDatabase):
else:
aid = self.conn.execute('INSERT INTO authors(name) VALUES (?)', (a,)).lastrowid
try:
self.conn.execute('INSERT INTO books_authors_link(book, author) VALUES (?,?)', (id, aid))
self.conn.execute('INSERT INTO books_authors_link(book, author) VALUES (?,?)',
(id, aid))
except IntegrityError: # Sometimes books specify the same author twice in their metadata
pass
ss = authors_to_sort_string(authors)
self.conn.execute('UPDATE books SET author_sort=? WHERE id=?',
(ss, id))
self.conn.commit()
self.data.set(id, FIELD_MAP['authors'], ','.join([a.replace(',', '|') for a in authors]), row_is_id=True)
self.data.set(id, FIELD_MAP['author_sort'], self.data[self.data.row(id)][FIELD_MAP['authors']], row_is_id=True)
self.data.set(id, FIELD_MAP['authors'],
','.join([a.replace(',', '|') for a in authors]),
row_is_id=True)
self.data.set(id, FIELD_MAP['author_sort'], ss, row_is_id=True)
self.set_path(id, True)
if notify:
self.notify('metadata', [id])
@ -1147,8 +1153,8 @@ class LibraryDatabase2(LibraryDatabase):
path = pt.name
else:
path = path_or_stream
return run_plugins_on_import(path, format)
return run_plugins_on_import(path, format)
def add_books(self, paths, formats, metadata, uris=[], add_duplicates=True):
'''
Add a book to the database. The result cache is not updated.

View File

@ -103,7 +103,7 @@ Device Integration
What devices does |app| support?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3, Amazon Kindle 1 and 2 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk.
I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -31,7 +31,7 @@ recipe_modules = ['recipe_' + r for r in (
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
'al_jazeera', 'winsupersite',
'al_jazeera', 'winsupersite', 'borba',
)]
import re, imp, inspect, time, os

View File

@ -0,0 +1,92 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
borba.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Borba(BasicNewsRecipe):
title = 'Borba Online'
__author__ = 'Darko Miletic'
description = 'Dnevne novine Borba Online'
publisher = 'IP Novine Borba'
category = 'news, politics, Serbia'
language = _('Serbian')
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf8'
remove_javascript = True
use_embedded_content = False
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
INDEX = u'http://www.borba.rs/'
extra_css = '@font-face {font-family: "serif0";src:url(res:///Data/FONT/serif0.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif0, serif1, serif} .article_description{font-family: serif0, serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'class':'main'})]
remove_tags_after = dict(name='div',attrs={'id':'written_comments_title'})
remove_tags = [
dict(name=['object','link','iframe','base','img'])
,dict(name='div',attrs={'id':'written_comments_title'})
]
feeds = [
(u'Najnovije vesti', u'http://www.borba.rs/content/blogsection/28/105/')
,(u'Prvi plan' , u'http://www.borba.rs/content/blogsection/4/92/' )
,(u'Dogadjaji' , u'http://www.borba.rs/content/blogsection/21/83/' )
,(u'Ekonomija' , u'http://www.borba.rs/content/blogsection/5/35/' )
,(u'Komentari' , u'http://www.borba.rs/content/blogsection/23/94/' )
,(u'Svet' , u'http://www.borba.rs/content/blogsection/7/36/' )
,(u'Sport' , u'http://www.borba.rs/content/blogsection/6/37/' )
,(u'Fama' , u'http://www.borba.rs/content/blogsection/25/89/' )
,(u'B2 Dodatak' , u'http://www.borba.rs/content/blogsection/30/116/')
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME'
soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(font=True):
del item['font']
return soup
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('a', attrs={'class':'contentpagetitle'}):
url = item['href']
title = self.tag_to_string(item)
articles.append({
'title' :title
,'date' :''
,'url' :url
,'description':''
})
totalfeeds.append((feedtitle, articles))
return totalfeeds

View File

@ -17,9 +17,19 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
description = u'Book reviews'
language = _('English')
__author__ = 'Kovid Goyal'
needs_subscription = True
remove_tags_before = {'id':'container'}
remove_tags = [{'class':['noprint', 'ad', 'footer']}, {'id':'right-content'}]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.nybooks.com/register/')
br.select_form(name='login')
br['email'] = self.username
br['password'] = self.password
br.submit()
return br
def parse_index(self):
root = html.fromstring(self.browser.open('http://www.nybooks.com/current-issue').read())
@ -42,10 +52,4 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
articles.append(article)
return [('Current Issue', articles)]

View File

@ -11,7 +11,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class NewYorker(BasicNewsRecipe):
title = u'The New Yorker'
__author__ = 'Darko Miletic'
description = 'Best of the US journalism'
description = 'The best of US journalism'
oldest_article = 7
language = _('English')
max_articles_per_feed = 100