Sync to trunk

This commit is contained in:
John Schember 2009-02-23 18:51:01 -05:00
commit 9d6fd803b4
56 changed files with 6862 additions and 5988 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
__version__ = '0.4.138'
__version__ = '0.4.139'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
'''
Various run time constants.

View File

@ -233,7 +233,7 @@ class RTFMetadataWriter(MetadataWriterPlugin):
class MOBIMetadataWriter(MetadataWriterPlugin):
name = 'Set MOBI metadata'
file_types = set(['mobi', 'prc'])
file_types = set(['mobi', 'prc', 'azw'])
description = _('Set metadata in %s files')%'MOBI'
author = 'Marshall T. Vandegrift'

View File

@ -33,9 +33,8 @@ class CYBOOKG3(USBMS):
EBOOK_DIR_MAIN = "eBooks"
EBOOK_DIR_CARD = "eBooks"
SUPPORTS_SUB_DIRS = True
THUMBNAIL_HEIGHT = 144
SUPPORTS_SUB_DIRS = True
def upload_books(self, files, names, on_card=False, end_session=True,
metadata=None):

View File

@ -150,10 +150,13 @@ class Device(object):
the device.
@param locations: Result of a call to L{upload_books}
@param metadata: List of dictionaries. Each dictionary must have the
keys C{title}, C{authors}, C{cover}, C{tags}. The value of the C{cover}
keys C{title}, C{authors}, C{author_sort}, C{cover}, C{tags}.
The value of the C{cover}
element can be None or a three element tuple (width, height, data)
where data is the image data in JPEG format as a string. C{tags} must be
a possibly empty list of strings. C{authors} must be a string.
C{author_sort} may be None. It is upto the driver to decide whether to
use C{author_sort} or not.
The dictionary can also have an optional key "tag order" which should be
another dictionary that maps tag names to lists of book ids. The ids are
ids from the book database.

View File

@ -10,7 +10,7 @@ from calibre.devices.usbms.driver import USBMS
class KINDLE(USBMS):
# Ordered list of supported formats
FORMATS = ['azw', 'mobi', 'prc', 'txt']
FORMATS = ['azw', 'mobi', 'prc', 'azw1', 'tpz', 'txt']
VENDOR_ID = [0x1949]
PRODUCT_ID = [0x0001]

View File

@ -55,7 +55,7 @@ class Book(object):
title = book_metadata_field("title")
authors = book_metadata_field("author", \
formatter=lambda x: x if x and x.strip() else "Unknown")
formatter=lambda x: x if x and x.strip() else _('Unknown'))
mime = book_metadata_field("mime")
rpath = book_metadata_field("path")
id = book_metadata_field("id", formatter=int)
@ -193,7 +193,7 @@ class BookList(_BookList):
attrs = {
"title" : info["title"],
'titleSorter' : sortable_title(info['title']),
"author" : info["authors"] if info['authors'] else 'Unknown', \
"author" : info["authors"] if info['authors'] else _('Unknown'),
"page":"0", "part":"0", "scale":"0", \
"sourceid":sourceid, "id":str(cid), "date":"", \
"mime":mime, "path":name, "size":str(size)

View File

@ -205,9 +205,8 @@ class HTMLProcessor(Processor, Rationalizer):
def save(self):
for meta in list(self.root.xpath('//meta')):
meta.getparent().remove(meta)
#for img in self.root.xpath('//img[@src]'):
# self.convert_image(img)
Processor.save(self)
# Strip all comments since Adobe DE is petrified of them
Processor.save(self, strip_comments=True)
def remove_first_image(self):
images = self.root.xpath('//img')

View File

@ -106,6 +106,8 @@ class EbookIterator(object):
family = family.group(1).strip().replace('"', '')
if family not in families:
print 'WARNING: Family aliasing not supported:', block
else:
print 'Loaded embedded font:', repr(family)
def __enter__(self):
self._tdir = TemporaryDirectory('_ebook_iter')

View File

@ -331,9 +331,8 @@ class PreProcessor(object):
# Convert all entities, since lxml doesn't handle them well
(re.compile(r'&(\S+?);'), convert_entities),
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
(re.compile(r'(?i)<{0,1}!\[(end){0,1}if[^>]*>'), lambda match: ''),
# Strip all comments since Adobe DE is petrified of them
(re.compile(r'<!--[^>]*>'), lambda match : ''),
(re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
lambda match: ''),
]
# Fix pdftohtml markup
@ -447,7 +446,7 @@ class Parser(PreProcessor, LoggingInterface):
def save_path(self):
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
def save(self):
def save(self, strip_comments=False):
'''
Save processed HTML into the content directory.
Should be called after all HTML processing is finished.
@ -458,7 +457,11 @@ class Parser(PreProcessor, LoggingInterface):
svg.set('xmlns', 'http://www.w3.org/2000/svg')
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
ans = re.compile(r'<head>', re.IGNORECASE).sub(
'<head>\n\t<meta http-equiv="Content-Type" '
'content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
if strip_comments:
ans = re.compile(r'<!--.*?-->', re.DOTALL).sub('', ans)
with open(self.save_path(), 'wb') as f:
f.write(ans)
return f.name
@ -594,7 +597,7 @@ class Processor(Parser):
mark = etree.Element('hr', style=page_break_before)
elem.addprevious(mark)
def save(self):
def save(self, strip_comments=False):
style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
if sheet is not None:
@ -608,7 +611,7 @@ class Processor(Parser):
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
open(path, 'wb').write(raw)
return Parser.save(self)
return Parser.save(self, strip_comments=strip_comments)
def populate_toc(self, toc):
'''

View File

@ -30,6 +30,7 @@ preferred_source_formats = [
'XHTML',
'PRC',
'AZW',
'FB2',
'RTF',
'PDF',
'TXT',

View File

@ -38,6 +38,7 @@ def extract_embedded_content(doc):
open(fname, 'wb').write(data)
def to_html(fb2file, tdir):
fb2file = os.path.abspath(fb2file)
cwd = os.getcwd()
try:
os.chdir(tdir)
@ -52,7 +53,7 @@ def to_html(fb2file, tdir):
result = transform(doc)
open('index.html', 'wb').write(transform.tostring(result))
try:
mi = get_metadata(open(fb2file, 'rb'))
mi = get_metadata(open(fb2file, 'rb'), 'fb2')
except:
mi = MetaInformation(None, None)
if not mi.title:

View File

@ -18,7 +18,7 @@ from calibre.ebooks.chardet import xml_to_unicode
from calibre import relpath
from calibre.constants import __appname__, __version__
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata import MetaInformation, string_to_authors
class Resource(object):
@ -614,7 +614,7 @@ class OPF(object):
def fget(self):
ans = []
for elem in self.authors_path(self.metadata):
ans.extend([x.strip() for x in self.get_text(elem).split(',')])
ans.extend(string_to_authors(self.get_text(elem)))
return ans
def fset(self, val):
@ -624,7 +624,7 @@ class OPF(object):
for author in val:
attrib = {'{%s}role'%self.NAMESPACES['opf']: 'aut'}
elem = self.create_metadata_element('creator', attrib=attrib)
self.set_text(elem, author)
self.set_text(elem, author.strip())
return property(fget=fget, fset=fset)

View File

@ -505,7 +505,7 @@ def get_metadata(stream):
except:
import traceback
traceback.print_exc()
return mi
return mi
def option_parser():

View File

@ -90,19 +90,25 @@ class AddFiles(Add):
def run(self):
self.canceled = False
for c, book in enumerate(self.paths):
if self.pd.canceled:
self.canceled = True
break
format = os.path.splitext(book)[1]
format = format[1:] if format else None
stream = open(book, 'rb')
self.formats.append(format)
self.names.append(os.path.basename(book))
self.get_metadata(c, stream, stream_type=format,
use_libprs_metadata=True)
self.wait_for_condition()
try:
self.canceled = False
for c, book in enumerate(self.paths):
if self.pd.canceled:
self.canceled = True
break
format = os.path.splitext(book)[1]
format = format[1:] if format else None
stream = open(book, 'rb')
self.formats.append(format)
self.names.append(os.path.basename(book))
self.get_metadata(c, stream, stream_type=format,
use_libprs_metadata=True)
self.wait_for_condition()
finally:
self.disconnect(self.get_metadata,
SIGNAL('metadata(PyQt_PyObject, PyQt_PyObject)'),
self.metadata_delivered)
self.get_metadata = None
def process_duplicates(self):
@ -178,33 +184,39 @@ class AddRecursive(Add):
def run(self):
root = os.path.abspath(self.path)
for dirpath in os.walk(root):
if self.is_canceled():
return
self.emit(SIGNAL('update(PyQt_PyObject)'),
_('Searching in')+' '+dirpath[0])
self.books += list(self.db.find_books_in_directory(dirpath[0],
self.single_book_per_directory))
self.books = [formats for formats in self.books if formats]
# Reset progress bar
self.emit(SIGNAL('searching_done()'))
try:
root = os.path.abspath(self.path)
for dirpath in os.walk(root):
if self.is_canceled():
return
self.emit(SIGNAL('update(PyQt_PyObject)'),
_('Searching in')+' '+dirpath[0])
self.books += list(self.db.find_books_in_directory(dirpath[0],
self.single_book_per_directory))
self.books = [formats for formats in self.books if formats]
# Reset progress bar
self.emit(SIGNAL('searching_done()'))
for c, formats in enumerate(self.books):
self.get_metadata.from_formats(c, formats)
self.wait_for_condition()
for c, formats in enumerate(self.books):
self.get_metadata.from_formats(c, formats)
self.wait_for_condition()
# Add books to database
for c, x in enumerate(self.metadata):
mi, formats = x
if self.is_canceled():
break
if self.db.has_book(mi):
self.duplicates.append((mi, formats))
else:
self.db.import_book(mi, formats, notify=False)
self.number_of_books_added += 1
self.emit(SIGNAL('pupdate(PyQt_PyObject)'), c)
# Add books to database
for c, x in enumerate(self.metadata):
mi, formats = x
if self.is_canceled():
break
if self.db.has_book(mi):
self.duplicates.append((mi, formats))
else:
self.db.import_book(mi, formats, notify=False)
self.number_of_books_added += 1
self.emit(SIGNAL('pupdate(PyQt_PyObject)'), c)
finally:
self.disconnect(self.get_metadata,
SIGNAL('metadataf(PyQt_PyObject, PyQt_PyObject)'),
self.metadata_delivered)
self.get_metadata = None
def process_duplicates(self):

View File

@ -180,11 +180,12 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.toolbar_button_size.setCurrentIndex(0 if icons == self.ICON_SIZES[0] else 1 if icons == self.ICON_SIZES[1] else 2)
self.show_toolbar_text.setChecked(config['show_text_in_toolbar'])
for ext in BOOK_EXTENSIONS:
book_exts = sorted(BOOK_EXTENSIONS)
for ext in book_exts:
self.single_format.addItem(ext.upper(), QVariant(ext))
single_format = config['save_to_disk_single_format']
self.single_format.setCurrentIndex(BOOK_EXTENSIONS.index(single_format))
self.single_format.setCurrentIndex(book_exts.index(single_format))
self.cover_browse.setValue(config['cover_flow_queue_length'])
self.systray_notifications.setChecked(not config['disable_tray_notification'])
from calibre.translations.compiled import translations
@ -203,7 +204,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.pdf_metadata.setChecked(prefs['read_file_metadata'])
added_html = False
for ext in BOOK_EXTENSIONS:
for ext in book_exts:
ext = ext.lower()
ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
if ext == 'lrf' or is_supported('book.'+ext):

Binary file not shown.

After

Width:  |  Height:  |  Size: 365 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 295 B

View File

@ -20,6 +20,7 @@ from calibre.gui2 import NONE, TableView, qstring_to_unicode, config, \
error_dialog
from calibre.utils.search_query_parser import SearchQueryParser
from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
from calibre.ebooks.metadata import string_to_authors
class LibraryDelegate(QItemDelegate):
COLOR = QColor("blue")
@ -364,12 +365,13 @@ class BooksModel(QAbstractTableModel):
return data
def get_metadata(self, rows, rows_are_ids=False):
metadata = []
def get_metadata(self, rows, rows_are_ids=False, full_metadata=False):
metadata, _full_metadata = [], []
if not rows_are_ids:
rows = [self.db.id(row.row()) for row in rows]
for id in rows:
mi = self.db.get_metadata(id, index_is_id=True)
_full_metadata.append(mi)
au = authors_to_string(mi.authors if mi.authors else [_('Unknown')])
tags = mi.tags if mi.tags else []
if mi.series is not None:
@ -377,6 +379,7 @@ class BooksModel(QAbstractTableModel):
info = {
'title' : mi.title,
'authors' : au,
'author_sort' : mi.author_sort,
'cover' : self.db.cover(id, index_is_id=True),
'tags' : tags,
'comments': mi.comments,
@ -387,7 +390,10 @@ class BooksModel(QAbstractTableModel):
}
metadata.append(info)
return metadata
if full_metadata:
return metadata, _full_metadata
else:
return metadata
def get_preferred_formats_from_ids(self, ids, all_formats, mode='r+b'):
ans = []
@ -928,12 +934,8 @@ class DeviceBooksModel(BooksModel):
au = self.unknown
if role == Qt.EditRole:
return QVariant(au)
au = au.split(',')
authors = []
for i in au:
authors += i.strip().split('&')
jau = [ a.strip() for a in authors ]
return QVariant("\n".join(jau))
authors = string_to_authors(au)
return QVariant("\n".join(authors))
elif col == 2:
size = self.db[self.map[row]].size
return QVariant(BooksView.human_readable(size))

View File

@ -1,3 +1,4 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, sys, textwrap, collections, traceback, time
@ -910,12 +911,13 @@ class Main(MainWindow, Ui_MainWindow):
if not self.device_manager or not rows or len(rows) == 0:
return
ids = iter(self.library_view.model().id(r) for r in rows)
metadata = self.library_view.model().get_metadata(rows)
metadata, full_metadata = self.library_view.model().get_metadata(
rows, full_metadata=True)
for mi in metadata:
cdata = mi['cover']
if cdata:
mi['cover'] = self.cover_to_thumbnail(cdata)
metadata = iter(metadata)
metadata, full_metadata = iter(metadata), iter(full_metadata)
_files = self.library_view.model().get_preferred_formats(rows,
self.device_manager.device_class.FORMATS,
paths=True, set_metadata=True,
@ -923,22 +925,15 @@ class Main(MainWindow, Ui_MainWindow):
files = [getattr(f, 'name', None) for f in _files]
bad, good, gf, names, remove_ids = [], [], [], [], []
for f in files:
mi = metadata.next()
mi, smi = metadata.next(), full_metadata.next()
id = ids.next()
if f is None:
bad.append(mi['title'])
else:
remove_ids.append(id)
aus = mi['authors'].split(',')
aus2 = []
for a in aus:
aus2.extend(a.split('&'))
try:
smi = MetaInformation(mi['title'], aus2)
smi.comments = mi.get('comments', None)
_f = open(f, 'r+b')
set_metadata(_f, smi, f.rpartition('.')[2])
_f.close()
with open(f, 'r+b') as _f:
set_metadata(_f, smi, f.rpartition('.')[2])
except:
print 'Error setting metadata in book:', mi['title']
traceback.print_exc()

View File

@ -174,6 +174,9 @@ def do_list(db, fields, sort_by, ascending, search_text, line_width, separator,
return template.generate(data=data).render('xml')
elif output_format == 'stanza':
data = [i for i in data if i.has_key('fmt_epub')]
for x in data:
if isinstance(x['fmt_epub'], unicode):
x['fmt_epub'] = x['fmt_epub'].encode('utf-8')
template = MarkupTemplate(STANZA_TEMPLATE)
return template.generate(id="urn:calibre:main", data=data, subtitle=subtitle,
sep=os.sep, quote=quote, updated=db.last_modified()).render('xml')

View File

@ -1147,7 +1147,7 @@ class LibraryDatabase2(LibraryDatabase):
path = pt.name
else:
path = path_or_stream
return run_plugins_on_import(path, format)
return run_plugins_on_import(path, format)
def add_books(self, paths, formats, metadata, uris=[], add_duplicates=True):
'''

View File

@ -114,10 +114,13 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal.
wget -O- http://calibre.kovidgoyal.net/downloads/${app}-${version}.tar.gz | tar xvz
cd calibre*
python setup.py build &amp;&amp; sudo python setup.py install
sudo calibre_postinstall
</pre>
Note that if your distribution does not have a
correctly compiled libunrar.so, ${app} will not
support rar files.
support rar files. The calibre_postinstall step
is required for device detection and integration
with your desktop environment.
</p>
</div>
</td>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -5,10 +5,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Contains the logic for parsing feeds.
'''
import time, logging, traceback, copy
import time, logging, traceback, copy, re
from datetime import datetime
from calibre.web.feeds.feedparser import parse
from calibre import entity_to_unicode
from lxml import html
class Article(object):
@ -19,6 +20,11 @@ class Article(object):
self.downloaded = False
self.id = id
self.title = title.strip() if title else title
try:
self.title = re.sub(r'&(\S+);',
entity_to_unicode, self.title)
except:
pass
self.url = url
self.summary = summary
if summary and not isinstance(summary, unicode):
@ -38,6 +44,7 @@ class Article(object):
self.utctime = datetime(*self.date[:6])
self.localtime = self.utctime + self.time_offset
def __repr__(self):
return \
(u'''\
@ -92,6 +99,7 @@ class Feed(object):
break
self.parse_article(item)
def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
max_articles_per_feed=100):
self.title = title if title else _('Unknown feed')

View File

@ -30,7 +30,8 @@ recipe_modules = ['recipe_' + r for r in (
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
'la_republica', 'physics_today',
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
'al_jazeera', 'winsupersite', 'borba',
)]
import re, imp, inspect, time, os

View File

@ -0,0 +1,50 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
aljazeera.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AlJazeera(BasicNewsRecipe):
title = 'Al Jazeera in English'
__author__ = 'Darko Miletic'
description = 'News from Middle East'
publisher = 'Al Jazeera'
category = 'news, politics, middle east'
simultaneous_downloads = 1
delay = 4
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'iso-8859-1'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
remove_tags = [
dict(name=['object','link'])
,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
]
feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(face=True):
del item['face']
return soup

View File

@ -0,0 +1,92 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
borba.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Borba(BasicNewsRecipe):
title = 'Borba Online'
__author__ = 'Darko Miletic'
description = 'Dnevne novine Borba Online'
publisher = 'IP Novine Borba'
category = 'news, politics, Serbia'
language = _('Serbian')
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'utf8'
remove_javascript = True
use_embedded_content = False
cover_url = 'http://www.borba.rs/images/stories/novine/naslovna_v.jpg'
INDEX = u'http://www.borba.rs/'
extra_css = '@font-face {font-family: "serif0";src:url(res:///Data/FONT/serif0.ttf)} @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif0, serif1, serif} .article_description{font-family: serif0, serif1, serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'class':'main'})]
remove_tags_after = dict(name='div',attrs={'id':'written_comments_title'})
remove_tags = [
dict(name=['object','link','iframe','base','img'])
,dict(name='div',attrs={'id':'written_comments_title'})
]
feeds = [
(u'Najnovije vesti', u'http://www.borba.rs/content/blogsection/28/105/')
,(u'Prvi plan' , u'http://www.borba.rs/content/blogsection/4/92/' )
,(u'Dogadjaji' , u'http://www.borba.rs/content/blogsection/21/83/' )
,(u'Ekonomija' , u'http://www.borba.rs/content/blogsection/5/35/' )
,(u'Komentari' , u'http://www.borba.rs/content/blogsection/23/94/' )
,(u'Svet' , u'http://www.borba.rs/content/blogsection/7/36/' )
,(u'Sport' , u'http://www.borba.rs/content/blogsection/6/37/' )
,(u'Fama' , u'http://www.borba.rs/content/blogsection/25/89/' )
,(u'B2 Dodatak' , u'http://www.borba.rs/content/blogsection/30/116/')
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME'
soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(font=True):
del item['font']
return soup
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('a', attrs={'class':'contentpagetitle'}):
url = item['href']
title = self.tag_to_string(item)
articles.append({
'title' :title
,'date' :''
,'url' :url
,'description':''
})
totalfeeds.append((feedtitle, articles))
return totalfeeds

View File

@ -0,0 +1,82 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from urlparse import urlparse, urlunparse
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
from threading import RLock
class ChicagoTribune(BasicNewsRecipe):
title = 'Chicago Tribune'
__author__ = 'Kovid Goyal'
description = 'Politics, local and business news from Chicago'
language = _('English')
use_embedded_content = False
articles_are_obfuscated = True
remove_tags_before = dict(name='h1')
obfuctation_lock = RLock()
feeds = [
('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
('Nation/world', 'http://feeds.chicagotribune.com/chicagotribune/news/nationworld/'),
('Hot topics', 'http://feeds.chicagotribune.com/chicagotribune/hottopics/'),
('Most E-mailed stories', 'http://feeds.chicagotribune.com/chicagotribune/email/'),
('Opinion', 'http://feeds.chicagotribune.com/chicagotribune/opinion/'),
('Off Topic', 'http://feeds.chicagotribune.com/chicagotribune/offtopic/'),
('Politics', 'http://feeds.chicagotribune.com/chicagotribune/politics/'),
('Special Reports', 'http://feeds.chicagotribune.com/chicagotribune/special/'),
('Religion News', 'http://feeds.chicagotribune.com/chicagotribune/religion/'),
('Business news', 'http://feeds.chicagotribune.com/chicagotribune/business/'),
('Jobs and Careers', 'http://feeds.chicagotribune.com/chicagotribune/career/'),
('Local scene', 'http://feeds.chicagotribune.com/chicagohomes/localscene/'),
('Phil Rosenthal', 'http://feeds.chicagotribune.com/chicagotribune/rosenthal/'),
('Tech Buzz', 'http://feeds.chicagotribune.com/chicagotribune/techbuzz/'),
('Your Money', 'http://feeds.chicagotribune.com/chicagotribune/yourmoney/'),
('Jon Hilkevitch - Getting around', 'http://feeds.chicagotribune.com/chicagotribune/gettingaround/'),
('Jon Yates - What\'s your problem?', 'http://feeds.chicagotribune.com/chicagotribune/problem/'),
('Garisson Keillor', 'http://feeds.chicagotribune.com/chicagotribune/keillor/'),
('Marks Jarvis - On Money', 'http://feeds.chicagotribune.com/chicagotribune/marksjarvisonmoney/'),
('Sports', 'http://feeds.chicagotribune.com/chicagotribune/sports/'),
('Arts and Architecture', 'http://feeds.chicagotribune.com/chicagotribune/arts/'),
('Books', 'http://feeds.chicagotribune.com/chicagotribune/books/'),
('Magazine', 'http://feeds.chicagotribune.com/chicagotribune/magazine/'),
('Movies', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
('Music', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
('TV', 'http://feeds.chicagotribune.com/chicagotribune/tv/'),
('Hypertext', 'http://feeds.chicagotribune.com/chicagotribune/hypertext/'),
('iPhone Blog', 'http://feeds.feedburner.com/redeye/iphoneblog'),
('Julie\'s Health Club', 'http://feeds.chicagotribune.com/chicagotribune_julieshealthclub/'),
]
temp_files = []
def get_article_url(self, article):
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
def get_obfuscated_article(self, url, logger):
with self.obfuctation_lock:
soup = self.index_to_soup(url)
img = soup.find('img', alt='Print')
if img is not None:
a = img.parent.find('a', href=True)
purl = urlparse(url)
xurl = urlunparse(purl[:2] + (a['href'], '', '', ''))
soup = self.index_to_soup(xurl)
for img in soup.findAll('img', src=True):
if img['src'].startswith('/'):
img['src'] = urlunparse(purl[:2]+(img['src'], '', '', ''))
html = unicode(soup)
else:
h1 = soup.find(id='page-title')
body = soup.find(attrs={'class':re.compile('asset-content')})
html = u'<html><head/><body>%s</body></html>'%(unicode(h1)+unicode(body))
self.temp_files.append(PersistentTemporaryFile('_chicago_tribune.xhtml'))
self.temp_files[-1].write(html.encode('utf-8'))
self.temp_files[-1].close()
return self.temp_files[-1].name

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
e-novine.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class E_novine(BasicNewsRecipe):
title = 'E-Novine'
__author__ = 'Darko Miletic'
description = 'News from Serbia'
publisher = 'E-novine'
category = 'news, politics, Balcans'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'cp1250'
cover_url = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
remove_tags = [dict(name=['object','link','embed','iframe'])]
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME'
soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
if ftag:
it = ftag.div
it.extract()
ftag.div.extract()
ftag.insert(0,it)
return soup

View File

@ -19,7 +19,7 @@ class Infobae(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
language = _('Spanish')
encoding = 'iso-8859-1'
encoding = 'cp1252'
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True
@ -28,6 +28,7 @@ class Infobae(BasicNewsRecipe):
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
, '--ignore-colors'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'

View File

@ -21,14 +21,16 @@ class LaSegunda(BasicNewsRecipe):
encoding = 'cp1252'
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
remove_javascript = True
language = _('Spanish')
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
keep_only_tags = [dict(name='table')]
@ -52,10 +54,7 @@ class LaSegunda(BasicNewsRecipe):
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(name='table', width=True):
del item['width']
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -7,11 +7,10 @@ pagina12.com.ar
'''
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Pagina12(BasicNewsRecipe):
title = u'Pagina/12'
title = 'Pagina/12'
__author__ = 'Darko Miletic'
description = 'Noticias de Argentina y el resto del mundo'
publisher = 'La Pagina S.A.'
@ -20,9 +19,11 @@ class Pagina12(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'cp1252'
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/tapagn.jpg')
remove_javascript = True
use_embedded_content = False
language = _('Spanish')
html2lrf_options = [
'--comment', description
@ -50,5 +51,3 @@ class Pagina12(BasicNewsRecipe):
for item in soup.findAll(style=True):
del item['style']
return soup
language = _('Spanish')

View File

@ -0,0 +1,28 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Winsupersite(BasicNewsRecipe):
title = u'Supersite for Windows'
description = u'Paul Thurrott SuperSite for Windows'
publisher = 'Paul Thurrott'
__author__ = 'Hypernova'
language = _('English')
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
html2lrf_options = ['--ignore-tables']
html2epub_options = 'linearize_tables = True'
remove_tags_before = dict(name='h1')
preprocess_regexps = [
(re.compile(r'<p>--Paul Thurrott.*</body>', re.DOTALL|re.IGNORECASE),
lambda match: '</body>'),
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open('http://www.winsupersite.com')
return br
feeds = [(u'Supersite for Windows', u'http://www.winsupersite.com/supersite.xml')]

View File

@ -284,7 +284,13 @@ class gui(OptionlessCommand):
manifest = '<RCC>\n<qresource prefix="/">\n%s\n</qresource>\n</RCC>'%'\n'.join(files)
with open('images.qrc', 'wb') as f:
f.write(manifest)
check_call(['pyrcc4', '-o', images, 'images.qrc'])
try:
check_call(['pyrcc4', '-o', images, 'images.qrc'])
except:
import traceback
traceback.print_exc()
raise Exception('You do not have pyrcc4 in your PATH. '
'Install the PyQt4 development tools.')
else:
print 'Images are up to date'
finally: