Pull from trunk

This commit is contained in:
Kovid Goyal 2009-02-21 20:42:54 -08:00
commit 1d6a6586a9
20 changed files with 278 additions and 38 deletions

View File

@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
__appname__ = 'calibre' __appname__ = 'calibre'
__version__ = '0.4.138' __version__ = '0.4.139'
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
Various run time constants. Various run time constants.

View File

@ -233,7 +233,7 @@ class RTFMetadataWriter(MetadataWriterPlugin):
class MOBIMetadataWriter(MetadataWriterPlugin): class MOBIMetadataWriter(MetadataWriterPlugin):
name = 'Set MOBI metadata' name = 'Set MOBI metadata'
file_types = set(['mobi', 'prc']) file_types = set(['mobi', 'prc', 'azw'])
description = _('Set metadata in %s files')%'MOBI' description = _('Set metadata in %s files')%'MOBI'
author = 'Marshall T. Vandegrift' author = 'Marshall T. Vandegrift'
@ -246,4 +246,4 @@ plugins = [HTML2ZIP]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')] x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataWriter')] x.__name__.endswith('MetadataWriter')]

View File

@ -33,6 +33,7 @@ class CYBOOKG3(USBMS):
EBOOK_DIR_MAIN = "eBooks" EBOOK_DIR_MAIN = "eBooks"
EBOOK_DIR_CARD = "eBooks" EBOOK_DIR_CARD = "eBooks"
THUMBNAIL_HEIGHT = 144
SUPPORTS_SUB_DIRS = True SUPPORTS_SUB_DIRS = True
def upload_books(self, files, names, on_card=False, end_session=True, def upload_books(self, files, names, on_card=False, end_session=True,

View File

@ -30,7 +30,7 @@ def write_t2b(t2bfile, coverdata=None):
if coverdata != None: if coverdata != None:
coverdata = StringIO.StringIO(coverdata) coverdata = StringIO.StringIO(coverdata)
cover = Image.open(coverdata).convert("L") cover = Image.open(coverdata).convert("L")
cover.thumbnail((96, 144)) cover.thumbnail((96, 144), Image.ANTIALIAS)
t2bcover = Image.new('L', (96, 144), 'white') t2bcover = Image.new('L', (96, 144), 'white')
x, y = cover.size x, y = cover.size

View File

@ -205,9 +205,8 @@ class HTMLProcessor(Processor, Rationalizer):
def save(self): def save(self):
for meta in list(self.root.xpath('//meta')): for meta in list(self.root.xpath('//meta')):
meta.getparent().remove(meta) meta.getparent().remove(meta)
#for img in self.root.xpath('//img[@src]'): # Strip all comments since Adobe DE is petrified of them
# self.convert_image(img) Processor.save(self, strip_comments=True)
Processor.save(self)
def remove_first_image(self): def remove_first_image(self):
images = self.root.xpath('//img') images = self.root.xpath('//img')

View File

@ -331,9 +331,8 @@ class PreProcessor(object):
# Convert all entities, since lxml doesn't handle them well # Convert all entities, since lxml doesn't handle them well
(re.compile(r'&(\S+?);'), convert_entities), (re.compile(r'&(\S+?);'), convert_entities),
# Remove the <![if/endif tags inserted by everybody's darling, MS Word # Remove the <![if/endif tags inserted by everybody's darling, MS Word
(re.compile(r'(?i)<{0,1}!\[(end){0,1}if[^>]*>'), lambda match: ''), (re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
# Strip all comments since Adobe DE is petrified of them lambda match: ''),
(re.compile(r'<!--[^>]*>'), lambda match : ''),
] ]
# Fix pdftohtml markup # Fix pdftohtml markup
@ -447,7 +446,7 @@ class Parser(PreProcessor, LoggingInterface):
def save_path(self): def save_path(self):
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path]) return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
def save(self): def save(self, strip_comments=False):
''' '''
Save processed HTML into the content directory. Save processed HTML into the content directory.
Should be called after all HTML processing is finished. Should be called after all HTML processing is finished.
@ -458,7 +457,11 @@ class Parser(PreProcessor, LoggingInterface):
svg.set('xmlns', 'http://www.w3.org/2000/svg') svg.set('xmlns', 'http://www.w3.org/2000/svg')
ans = tostring(self.root, pretty_print=self.opts.pretty_print) ans = tostring(self.root, pretty_print=self.opts.pretty_print)
ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:] ans = re.compile(r'<head>', re.IGNORECASE).sub(
'<head>\n\t<meta http-equiv="Content-Type" '
'content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
if strip_comments:
ans = re.compile(r'<!--.*?-->', re.DOTALL).sub('', ans)
with open(self.save_path(), 'wb') as f: with open(self.save_path(), 'wb') as f:
f.write(ans) f.write(ans)
return f.name return f.name
@ -594,7 +597,7 @@ class Processor(Parser):
mark = etree.Element('hr', style=page_break_before) mark = etree.Element('hr', style=page_break_before)
elem.addprevious(mark) elem.addprevious(mark)
def save(self): def save(self, strip_comments=False):
style_path = os.path.splitext(os.path.basename(self.save_path()))[0] style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]): for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
if sheet is not None: if sheet is not None:
@ -608,7 +611,7 @@ class Processor(Parser):
if isinstance(raw, unicode): if isinstance(raw, unicode):
raw = raw.encode('utf-8') raw = raw.encode('utf-8')
open(path, 'wb').write(raw) open(path, 'wb').write(raw)
return Parser.save(self) return Parser.save(self, strip_comments=strip_comments)
def populate_toc(self, toc): def populate_toc(self, toc):
''' '''

View File

@ -30,6 +30,7 @@ preferred_source_formats = [
'XHTML', 'XHTML',
'PRC', 'PRC',
'AZW', 'AZW',
'FB2',
'RTF', 'RTF',
'PDF', 'PDF',
'TXT', 'TXT',

View File

@ -38,6 +38,7 @@ def extract_embedded_content(doc):
open(fname, 'wb').write(data) open(fname, 'wb').write(data)
def to_html(fb2file, tdir): def to_html(fb2file, tdir):
fb2file = os.path.abspath(fb2file)
cwd = os.getcwd() cwd = os.getcwd()
try: try:
os.chdir(tdir) os.chdir(tdir)
@ -52,7 +53,7 @@ def to_html(fb2file, tdir):
result = transform(doc) result = transform(doc)
open('index.html', 'wb').write(transform.tostring(result)) open('index.html', 'wb').write(transform.tostring(result))
try: try:
mi = get_metadata(open(fb2file, 'rb')) mi = get_metadata(open(fb2file, 'rb'), 'fb2')
except: except:
mi = MetaInformation(None, None) mi = MetaInformation(None, None)
if not mi.title: if not mi.title:

Binary file not shown.

After

Width:  |  Height:  |  Size: 295 B

View File

@ -114,10 +114,13 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal.
wget -O- http://calibre.kovidgoyal.net/downloads/${app}-${version}.tar.gz | tar xvz wget -O- http://calibre.kovidgoyal.net/downloads/${app}-${version}.tar.gz | tar xvz
cd calibre* cd calibre*
python setup.py build &amp;&amp; sudo python setup.py install python setup.py build &amp;&amp; sudo python setup.py install
sudo calibre_postinstall
</pre> </pre>
Note that if your distribution does not have a Note that if your distribution does not have a
correctly compiled libunrar.so, ${app} will not correctly compiled libunrar.so, ${app} will not
support rar files. support rar files. The calibre_postinstall step
is required for device detection and integration
with your desktop environment.
</p> </p>
</div> </div>
</td> </td>

View File

@ -5,10 +5,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
Contains the logic for parsing feeds. Contains the logic for parsing feeds.
''' '''
import time, logging, traceback, copy import time, logging, traceback, copy, re
from datetime import datetime from datetime import datetime
from calibre.web.feeds.feedparser import parse from calibre.web.feeds.feedparser import parse
from calibre import entity_to_unicode
from lxml import html from lxml import html
class Article(object): class Article(object):
@ -19,6 +20,11 @@ class Article(object):
self.downloaded = False self.downloaded = False
self.id = id self.id = id
self.title = title.strip() if title else title self.title = title.strip() if title else title
try:
self.title = re.sub(r'&(\S+);',
entity_to_unicode, self.title)
except:
pass
self.url = url self.url = url
self.summary = summary self.summary = summary
if summary and not isinstance(summary, unicode): if summary and not isinstance(summary, unicode):
@ -37,6 +43,7 @@ class Article(object):
self.date = published self.date = published
self.utctime = datetime(*self.date[:6]) self.utctime = datetime(*self.date[:6])
self.localtime = self.utctime + self.time_offset self.localtime = self.utctime + self.time_offset
def __repr__(self): def __repr__(self):
return \ return \
@ -91,7 +98,8 @@ class Feed(object):
if len(self.articles) >= max_articles_per_feed: if len(self.articles) >= max_articles_per_feed:
break break
self.parse_article(item) self.parse_article(item)
def populate_from_preparsed_feed(self, title, articles, oldest_article=7, def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
max_articles_per_feed=100): max_articles_per_feed=100):
self.title = title if title else _('Unknown feed') self.title = title if title else _('Unknown feed')

View File

@ -30,7 +30,8 @@ recipe_modules = ['recipe_' + r for r in (
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna', 'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices', 'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline', 'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
'la_republica', 'physics_today', 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
'al_jazeera', 'winsupersite',
)] )]
import re, imp, inspect, time, os import re, imp, inspect, time, os

View File

@ -0,0 +1,50 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
aljazeera.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class AlJazeera(BasicNewsRecipe):
title = 'Al Jazeera in English'
__author__ = 'Darko Miletic'
description = 'News from Middle East'
publisher = 'Al Jazeera'
category = 'news, politics, middle east'
simultaneous_downloads = 1
delay = 4
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'iso-8859-1'
remove_javascript = True
use_embedded_content = False
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
remove_tags = [
dict(name=['object','link'])
,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
]
feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(face=True):
del item['face']
return soup

View File

@ -0,0 +1,82 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from urlparse import urlparse, urlunparse
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
from threading import RLock
class ChicagoTribune(BasicNewsRecipe):
title = 'Chicago Tribune'
__author__ = 'Kovid Goyal'
description = 'Politics, local and business news from Chicago'
language = _('English')
use_embedded_content = False
articles_are_obfuscated = True
remove_tags_before = dict(name='h1')
obfuctation_lock = RLock()
feeds = [
('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
('Nation/world', 'http://feeds.chicagotribune.com/chicagotribune/news/nationworld/'),
('Hot topics', 'http://feeds.chicagotribune.com/chicagotribune/hottopics/'),
('Most E-mailed stories', 'http://feeds.chicagotribune.com/chicagotribune/email/'),
('Opinion', 'http://feeds.chicagotribune.com/chicagotribune/opinion/'),
('Off Topic', 'http://feeds.chicagotribune.com/chicagotribune/offtopic/'),
('Politics', 'http://feeds.chicagotribune.com/chicagotribune/politics/'),
('Special Reports', 'http://feeds.chicagotribune.com/chicagotribune/special/'),
('Religion News', 'http://feeds.chicagotribune.com/chicagotribune/religion/'),
('Business news', 'http://feeds.chicagotribune.com/chicagotribune/business/'),
('Jobs and Careers', 'http://feeds.chicagotribune.com/chicagotribune/career/'),
('Local scene', 'http://feeds.chicagotribune.com/chicagohomes/localscene/'),
('Phil Rosenthal', 'http://feeds.chicagotribune.com/chicagotribune/rosenthal/'),
('Tech Buzz', 'http://feeds.chicagotribune.com/chicagotribune/techbuzz/'),
('Your Money', 'http://feeds.chicagotribune.com/chicagotribune/yourmoney/'),
('Jon Hilkevitch - Getting around', 'http://feeds.chicagotribune.com/chicagotribune/gettingaround/'),
('Jon Yates - What\'s your problem?', 'http://feeds.chicagotribune.com/chicagotribune/problem/'),
('Garisson Keillor', 'http://feeds.chicagotribune.com/chicagotribune/keillor/'),
('Marks Jarvis - On Money', 'http://feeds.chicagotribune.com/chicagotribune/marksjarvisonmoney/'),
('Sports', 'http://feeds.chicagotribune.com/chicagotribune/sports/'),
('Arts and Architecture', 'http://feeds.chicagotribune.com/chicagotribune/arts/'),
('Books', 'http://feeds.chicagotribune.com/chicagotribune/books/'),
('Magazine', 'http://feeds.chicagotribune.com/chicagotribune/magazine/'),
('Movies', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
('Music', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
('TV', 'http://feeds.chicagotribune.com/chicagotribune/tv/'),
('Hypertext', 'http://feeds.chicagotribune.com/chicagotribune/hypertext/'),
('iPhone Blog', 'http://feeds.feedburner.com/redeye/iphoneblog'),
('Julie\'s Health Club', 'http://feeds.chicagotribune.com/chicagotribune_julieshealthclub/'),
]
temp_files = []
def get_article_url(self, article):
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
def get_obfuscated_article(self, url, logger):
with self.obfuctation_lock:
soup = self.index_to_soup(url)
img = soup.find('img', alt='Print')
if img is not None:
a = img.parent.find('a', href=True)
purl = urlparse(url)
xurl = urlunparse(purl[:2] + (a['href'], '', '', ''))
soup = self.index_to_soup(xurl)
for img in soup.findAll('img', src=True):
if img['src'].startswith('/'):
img['src'] = urlunparse(purl[:2]+(img['src'], '', '', ''))
html = unicode(soup)
else:
h1 = soup.find(id='page-title')
body = soup.find(attrs={'class':re.compile('asset-content')})
html = u'<html><head/><body>%s</body></html>'%(unicode(h1)+unicode(body))
self.temp_files.append(PersistentTemporaryFile('_chicago_tribune.xhtml'))
self.temp_files[-1].write(html.encode('utf-8'))
self.temp_files[-1].close()
return self.temp_files[-1].name

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
e-novine.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class E_novine(BasicNewsRecipe):
title = 'E-Novine'
__author__ = 'Darko Miletic'
description = 'News from Serbia'
publisher = 'E-novine'
category = 'news, politics, Balcans'
oldest_article = 1
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'cp1250'
cover_url = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
remove_tags = [dict(name=['object','link','embed','iframe'])]
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
def preprocess_html(self, soup):
soup.html['xml:lang'] = 'sr-Latn-ME'
soup.html['lang'] = 'sr-Latn-ME'
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
if ftag:
it = ftag.div
it.extract()
ftag.div.extract()
ftag.insert(0,it)
return soup

View File

@ -19,7 +19,7 @@ class Infobae(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
language = _('Spanish') language = _('Spanish')
encoding = 'iso-8859-1' encoding = 'cp1252'
cover_url = 'http://www.infobae.com/imgs/header/header.gif' cover_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True remove_javascript = True
@ -28,9 +28,10 @@ class Infobae(BasicNewsRecipe):
, '--category' , category , '--category' , category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables' , '--ignore-tables'
, '--ignore-colors'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
remove_tags = [ remove_tags = [
dict(name=['embed','link','object']) dict(name=['embed','link','object'])

View File

@ -6,8 +6,8 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
lasegunda.com lasegunda.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class LaSegunda(BasicNewsRecipe): class LaSegunda(BasicNewsRecipe):
title = 'La Segunda' title = 'La Segunda'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -21,14 +21,16 @@ class LaSegunda(BasicNewsRecipe):
encoding = 'cp1252' encoding = 'cp1252'
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif' cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
remove_javascript = True remove_javascript = True
language = _('Spanish')
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--category', category , '--category', category
, '--publisher', publisher , '--publisher', publisher
, '--ignore-tables'
] ]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
keep_only_tags = [dict(name='table')] keep_only_tags = [dict(name='table')]
@ -52,10 +54,7 @@ class LaSegunda(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>' mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
soup.head.insert(0,mtag) soup.head.insert(0,mtag)
for item in soup.findAll(name='table', width=True):
del item['width']
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Spanish')

View File

@ -7,11 +7,10 @@ pagina12.com.ar
''' '''
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe
class Pagina12(BasicNewsRecipe): class Pagina12(BasicNewsRecipe):
title = u'Pagina/12' title = 'Pagina/12'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = 'Noticias de Argentina y el resto del mundo' description = 'Noticias de Argentina y el resto del mundo'
publisher = 'La Pagina S.A.' publisher = 'La Pagina S.A.'
@ -20,12 +19,14 @@ class Pagina12(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'cp1252'
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg') cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/tapagn.jpg')
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
language = _('Spanish')
html2lrf_options = [ html2lrf_options = [
'--comment', description '--comment', description
, '--category', category , '--category', category
, '--publisher', publisher , '--publisher', publisher
] ]
@ -50,5 +51,3 @@ class Pagina12(BasicNewsRecipe):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup
language = _('Spanish')

View File

@ -0,0 +1,28 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Winsupersite(BasicNewsRecipe):
title = u'Supersite for Windows'
description = u'Paul Thurrott SuperSite for Windows'
publisher = 'Paul Thurrott'
__author__ = 'Hypernova'
language = _('English')
oldest_article = 30
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
html2lrf_options = ['--ignore-tables']
html2epub_options = 'linearize_tables = True'
remove_tags_before = dict(name='h1')
preprocess_regexps = [
(re.compile(r'<p>--Paul Thurrott.*</body>', re.DOTALL|re.IGNORECASE),
lambda match: '</body>'),
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open('http://www.winsupersite.com')
return br
feeds = [(u'Supersite for Windows', u'http://www.winsupersite.com/supersite.xml')]

View File

@ -284,7 +284,13 @@ class gui(OptionlessCommand):
manifest = '<RCC>\n<qresource prefix="/">\n%s\n</qresource>\n</RCC>'%'\n'.join(files) manifest = '<RCC>\n<qresource prefix="/">\n%s\n</qresource>\n</RCC>'%'\n'.join(files)
with open('images.qrc', 'wb') as f: with open('images.qrc', 'wb') as f:
f.write(manifest) f.write(manifest)
check_call(['pyrcc4', '-o', images, 'images.qrc']) try:
check_call(['pyrcc4', '-o', images, 'images.qrc'])
except:
import traceback
traceback.print_exc()
raise Exception('You do not have pyrcc4 in your PATH. '
'Install the PyQt4 development tools.')
else: else:
print 'Images are up to date' print 'Images are up to date'
finally: finally:
@ -670,7 +676,7 @@ class stage3(OptionlessCommand):
def run(self): def run(self):
OptionlessCommand.run(self) OptionlessCommand.run(self)
self.misc() self.misc()
class stage2(OptionlessCommand): class stage2(OptionlessCommand):
description = 'Stage 2 of the build process' description = 'Stage 2 of the build process'
@ -699,4 +705,4 @@ class upload(OptionlessCommand):
('stage1', None), ('stage1', None),
('stage2', None), ('stage2', None),
('stage3', None) ('stage3', None)
] ]