mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Pull from trunk
This commit is contained in:
commit
1d6a6586a9
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = 'calibre'
|
||||
__version__ = '0.4.138'
|
||||
__version__ = '0.4.139'
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
'''
|
||||
Various run time constants.
|
||||
|
@ -233,7 +233,7 @@ class RTFMetadataWriter(MetadataWriterPlugin):
|
||||
class MOBIMetadataWriter(MetadataWriterPlugin):
|
||||
|
||||
name = 'Set MOBI metadata'
|
||||
file_types = set(['mobi', 'prc'])
|
||||
file_types = set(['mobi', 'prc', 'azw'])
|
||||
description = _('Set metadata in %s files')%'MOBI'
|
||||
author = 'Marshall T. Vandegrift'
|
||||
|
||||
@ -246,4 +246,4 @@ plugins = [HTML2ZIP]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataWriter')]
|
||||
x.__name__.endswith('MetadataWriter')]
|
||||
|
@ -33,6 +33,7 @@ class CYBOOKG3(USBMS):
|
||||
|
||||
EBOOK_DIR_MAIN = "eBooks"
|
||||
EBOOK_DIR_CARD = "eBooks"
|
||||
THUMBNAIL_HEIGHT = 144
|
||||
SUPPORTS_SUB_DIRS = True
|
||||
|
||||
def upload_books(self, files, names, on_card=False, end_session=True,
|
||||
|
@ -30,7 +30,7 @@ def write_t2b(t2bfile, coverdata=None):
|
||||
if coverdata != None:
|
||||
coverdata = StringIO.StringIO(coverdata)
|
||||
cover = Image.open(coverdata).convert("L")
|
||||
cover.thumbnail((96, 144))
|
||||
cover.thumbnail((96, 144), Image.ANTIALIAS)
|
||||
t2bcover = Image.new('L', (96, 144), 'white')
|
||||
|
||||
x, y = cover.size
|
||||
|
@ -205,9 +205,8 @@ class HTMLProcessor(Processor, Rationalizer):
|
||||
def save(self):
|
||||
for meta in list(self.root.xpath('//meta')):
|
||||
meta.getparent().remove(meta)
|
||||
#for img in self.root.xpath('//img[@src]'):
|
||||
# self.convert_image(img)
|
||||
Processor.save(self)
|
||||
# Strip all comments since Adobe DE is petrified of them
|
||||
Processor.save(self, strip_comments=True)
|
||||
|
||||
def remove_first_image(self):
|
||||
images = self.root.xpath('//img')
|
||||
|
@ -331,9 +331,8 @@ class PreProcessor(object):
|
||||
# Convert all entities, since lxml doesn't handle them well
|
||||
(re.compile(r'&(\S+?);'), convert_entities),
|
||||
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
||||
(re.compile(r'(?i)<{0,1}!\[(end){0,1}if[^>]*>'), lambda match: ''),
|
||||
# Strip all comments since Adobe DE is petrified of them
|
||||
(re.compile(r'<!--[^>]*>'), lambda match : ''),
|
||||
(re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
|
||||
lambda match: ''),
|
||||
]
|
||||
|
||||
# Fix pdftohtml markup
|
||||
@ -447,7 +446,7 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
def save_path(self):
|
||||
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
|
||||
|
||||
def save(self):
|
||||
def save(self, strip_comments=False):
|
||||
'''
|
||||
Save processed HTML into the content directory.
|
||||
Should be called after all HTML processing is finished.
|
||||
@ -458,7 +457,11 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
svg.set('xmlns', 'http://www.w3.org/2000/svg')
|
||||
|
||||
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
|
||||
ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
|
||||
ans = re.compile(r'<head>', re.IGNORECASE).sub(
|
||||
'<head>\n\t<meta http-equiv="Content-Type" '
|
||||
'content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
|
||||
if strip_comments:
|
||||
ans = re.compile(r'<!--.*?-->', re.DOTALL).sub('', ans)
|
||||
with open(self.save_path(), 'wb') as f:
|
||||
f.write(ans)
|
||||
return f.name
|
||||
@ -594,7 +597,7 @@ class Processor(Parser):
|
||||
mark = etree.Element('hr', style=page_break_before)
|
||||
elem.addprevious(mark)
|
||||
|
||||
def save(self):
|
||||
def save(self, strip_comments=False):
|
||||
style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
|
||||
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
|
||||
if sheet is not None:
|
||||
@ -608,7 +611,7 @@ class Processor(Parser):
|
||||
if isinstance(raw, unicode):
|
||||
raw = raw.encode('utf-8')
|
||||
open(path, 'wb').write(raw)
|
||||
return Parser.save(self)
|
||||
return Parser.save(self, strip_comments=strip_comments)
|
||||
|
||||
def populate_toc(self, toc):
|
||||
'''
|
||||
|
@ -30,6 +30,7 @@ preferred_source_formats = [
|
||||
'XHTML',
|
||||
'PRC',
|
||||
'AZW',
|
||||
'FB2',
|
||||
'RTF',
|
||||
'PDF',
|
||||
'TXT',
|
||||
|
@ -38,6 +38,7 @@ def extract_embedded_content(doc):
|
||||
open(fname, 'wb').write(data)
|
||||
|
||||
def to_html(fb2file, tdir):
|
||||
fb2file = os.path.abspath(fb2file)
|
||||
cwd = os.getcwd()
|
||||
try:
|
||||
os.chdir(tdir)
|
||||
@ -52,7 +53,7 @@ def to_html(fb2file, tdir):
|
||||
result = transform(doc)
|
||||
open('index.html', 'wb').write(transform.tostring(result))
|
||||
try:
|
||||
mi = get_metadata(open(fb2file, 'rb'))
|
||||
mi = get_metadata(open(fb2file, 'rb'), 'fb2')
|
||||
except:
|
||||
mi = MetaInformation(None, None)
|
||||
if not mi.title:
|
||||
|
BIN
src/calibre/gui2/images/news/e_novine.png
Normal file
BIN
src/calibre/gui2/images/news/e_novine.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 295 B |
@ -114,10 +114,13 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal.
|
||||
wget -O- http://calibre.kovidgoyal.net/downloads/${app}-${version}.tar.gz | tar xvz
|
||||
cd calibre*
|
||||
python setup.py build && sudo python setup.py install
|
||||
sudo calibre_postinstall
|
||||
</pre>
|
||||
Note that if your distribution does not have a
|
||||
correctly compiled libunrar.so, ${app} will not
|
||||
support rar files.
|
||||
support rar files. The calibre_postinstall step
|
||||
is required for device detection and integration
|
||||
with your desktop environment.
|
||||
</p>
|
||||
</div>
|
||||
</td>
|
||||
|
@ -5,10 +5,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
Contains the logic for parsing feeds.
|
||||
'''
|
||||
import time, logging, traceback, copy
|
||||
import time, logging, traceback, copy, re
|
||||
from datetime import datetime
|
||||
|
||||
from calibre.web.feeds.feedparser import parse
|
||||
from calibre import entity_to_unicode
|
||||
from lxml import html
|
||||
|
||||
class Article(object):
|
||||
@ -19,6 +20,11 @@ class Article(object):
|
||||
self.downloaded = False
|
||||
self.id = id
|
||||
self.title = title.strip() if title else title
|
||||
try:
|
||||
self.title = re.sub(r'&(\S+);',
|
||||
entity_to_unicode, self.title)
|
||||
except:
|
||||
pass
|
||||
self.url = url
|
||||
self.summary = summary
|
||||
if summary and not isinstance(summary, unicode):
|
||||
@ -37,6 +43,7 @@ class Article(object):
|
||||
self.date = published
|
||||
self.utctime = datetime(*self.date[:6])
|
||||
self.localtime = self.utctime + self.time_offset
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return \
|
||||
@ -91,7 +98,8 @@ class Feed(object):
|
||||
if len(self.articles) >= max_articles_per_feed:
|
||||
break
|
||||
self.parse_article(item)
|
||||
|
||||
|
||||
|
||||
def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
|
||||
max_articles_per_feed=100):
|
||||
self.title = title if title else _('Unknown feed')
|
||||
|
@ -30,7 +30,8 @@ recipe_modules = ['recipe_' + r for r in (
|
||||
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
|
||||
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
|
||||
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
|
||||
'la_republica', 'physics_today',
|
||||
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
|
||||
'al_jazeera', 'winsupersite',
|
||||
)]
|
||||
|
||||
import re, imp, inspect, time, os
|
||||
|
50
src/calibre/web/feeds/recipes/recipe_al_jazeera.py
Normal file
50
src/calibre/web/feeds/recipes/recipe_al_jazeera.py
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
aljazeera.net
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AlJazeera(BasicNewsRecipe):
|
||||
title = 'Al Jazeera in English'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Middle East'
|
||||
publisher = 'Al Jazeera'
|
||||
category = 'news, politics, middle east'
|
||||
simultaneous_downloads = 1
|
||||
delay = 4
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'iso-8859-1'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
|
||||
]
|
||||
|
||||
feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(face=True):
|
||||
del item['face']
|
||||
return soup
|
||||
|
82
src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
Normal file
82
src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
Normal file
@ -0,0 +1,82 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
from urlparse import urlparse, urlunparse
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from threading import RLock
|
||||
|
||||
class ChicagoTribune(BasicNewsRecipe):
|
||||
|
||||
title = 'Chicago Tribune'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'Politics, local and business news from Chicago'
|
||||
language = _('English')
|
||||
use_embedded_content = False
|
||||
articles_are_obfuscated = True
|
||||
remove_tags_before = dict(name='h1')
|
||||
obfuctation_lock = RLock()
|
||||
|
||||
feeds = [
|
||||
('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
|
||||
('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
|
||||
('Nation/world', 'http://feeds.chicagotribune.com/chicagotribune/news/nationworld/'),
|
||||
('Hot topics', 'http://feeds.chicagotribune.com/chicagotribune/hottopics/'),
|
||||
('Most E-mailed stories', 'http://feeds.chicagotribune.com/chicagotribune/email/'),
|
||||
('Opinion', 'http://feeds.chicagotribune.com/chicagotribune/opinion/'),
|
||||
('Off Topic', 'http://feeds.chicagotribune.com/chicagotribune/offtopic/'),
|
||||
('Politics', 'http://feeds.chicagotribune.com/chicagotribune/politics/'),
|
||||
('Special Reports', 'http://feeds.chicagotribune.com/chicagotribune/special/'),
|
||||
('Religion News', 'http://feeds.chicagotribune.com/chicagotribune/religion/'),
|
||||
('Business news', 'http://feeds.chicagotribune.com/chicagotribune/business/'),
|
||||
('Jobs and Careers', 'http://feeds.chicagotribune.com/chicagotribune/career/'),
|
||||
('Local scene', 'http://feeds.chicagotribune.com/chicagohomes/localscene/'),
|
||||
('Phil Rosenthal', 'http://feeds.chicagotribune.com/chicagotribune/rosenthal/'),
|
||||
('Tech Buzz', 'http://feeds.chicagotribune.com/chicagotribune/techbuzz/'),
|
||||
('Your Money', 'http://feeds.chicagotribune.com/chicagotribune/yourmoney/'),
|
||||
('Jon Hilkevitch - Getting around', 'http://feeds.chicagotribune.com/chicagotribune/gettingaround/'),
|
||||
('Jon Yates - What\'s your problem?', 'http://feeds.chicagotribune.com/chicagotribune/problem/'),
|
||||
('Garisson Keillor', 'http://feeds.chicagotribune.com/chicagotribune/keillor/'),
|
||||
('Marks Jarvis - On Money', 'http://feeds.chicagotribune.com/chicagotribune/marksjarvisonmoney/'),
|
||||
('Sports', 'http://feeds.chicagotribune.com/chicagotribune/sports/'),
|
||||
('Arts and Architecture', 'http://feeds.chicagotribune.com/chicagotribune/arts/'),
|
||||
('Books', 'http://feeds.chicagotribune.com/chicagotribune/books/'),
|
||||
('Magazine', 'http://feeds.chicagotribune.com/chicagotribune/magazine/'),
|
||||
('Movies', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
|
||||
('Music', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
|
||||
('TV', 'http://feeds.chicagotribune.com/chicagotribune/tv/'),
|
||||
('Hypertext', 'http://feeds.chicagotribune.com/chicagotribune/hypertext/'),
|
||||
('iPhone Blog', 'http://feeds.feedburner.com/redeye/iphoneblog'),
|
||||
('Julie\'s Health Club', 'http://feeds.chicagotribune.com/chicagotribune_julieshealthclub/'),
|
||||
]
|
||||
|
||||
temp_files = []
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||
|
||||
def get_obfuscated_article(self, url, logger):
|
||||
with self.obfuctation_lock:
|
||||
soup = self.index_to_soup(url)
|
||||
img = soup.find('img', alt='Print')
|
||||
if img is not None:
|
||||
a = img.parent.find('a', href=True)
|
||||
purl = urlparse(url)
|
||||
xurl = urlunparse(purl[:2] + (a['href'], '', '', ''))
|
||||
soup = self.index_to_soup(xurl)
|
||||
for img in soup.findAll('img', src=True):
|
||||
if img['src'].startswith('/'):
|
||||
img['src'] = urlunparse(purl[:2]+(img['src'], '', '', ''))
|
||||
html = unicode(soup)
|
||||
else:
|
||||
h1 = soup.find(id='page-title')
|
||||
body = soup.find(attrs={'class':re.compile('asset-content')})
|
||||
html = u'<html><head/><body>%s</body></html>'%(unicode(h1)+unicode(body))
|
||||
self.temp_files.append(PersistentTemporaryFile('_chicago_tribune.xhtml'))
|
||||
self.temp_files[-1].write(html.encode('utf-8'))
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
58
src/calibre/web/feeds/recipes/recipe_e_novine.py
Normal file
58
src/calibre/web/feeds/recipes/recipe_e_novine.py
Normal file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
e-novine.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class E_novine(BasicNewsRecipe):
|
||||
title = 'E-Novine'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Serbia'
|
||||
publisher = 'E-novine'
|
||||
category = 'news, politics, Balcans'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1250'
|
||||
cover_url = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
language = _('Serbian')
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
|
||||
|
||||
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
||||
|
||||
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = 'sr-Latn-ME'
|
||||
soup.html['lang'] = 'sr-Latn-ME'
|
||||
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
|
||||
if ftag:
|
||||
it = ftag.div
|
||||
it.extract()
|
||||
ftag.div.extract()
|
||||
ftag.insert(0,it)
|
||||
return soup
|
@ -19,7 +19,7 @@ class Infobae(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = _('Spanish')
|
||||
encoding = 'iso-8859-1'
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
|
||||
remove_javascript = True
|
||||
|
||||
@ -28,9 +28,10 @@ class Infobae(BasicNewsRecipe):
|
||||
, '--category' , category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
, '--ignore-colors'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['embed','link','object'])
|
||||
|
@ -6,8 +6,8 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
lasegunda.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LaSegunda(BasicNewsRecipe):
|
||||
title = 'La Segunda'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -21,14 +21,16 @@ class LaSegunda(BasicNewsRecipe):
|
||||
encoding = 'cp1252'
|
||||
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
||||
remove_javascript = True
|
||||
language = _('Spanish')
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||
|
||||
keep_only_tags = [dict(name='table')]
|
||||
|
||||
@ -52,10 +54,7 @@ class LaSegunda(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(name='table', width=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
@ -7,11 +7,10 @@ pagina12.com.ar
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Pagina12(BasicNewsRecipe):
|
||||
title = u'Pagina/12'
|
||||
title = 'Pagina/12'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias de Argentina y el resto del mundo'
|
||||
publisher = 'La Pagina S.A.'
|
||||
@ -20,12 +19,14 @@ class Pagina12(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
|
||||
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/tapagn.jpg')
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
language = _('Spanish')
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
@ -50,5 +51,3 @@ class Pagina12(BasicNewsRecipe):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
language = _('Spanish')
|
28
src/calibre/web/feeds/recipes/recipe_winsupersite.py
Normal file
28
src/calibre/web/feeds/recipes/recipe_winsupersite.py
Normal file
@ -0,0 +1,28 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Winsupersite(BasicNewsRecipe):
|
||||
title = u'Supersite for Windows'
|
||||
description = u'Paul Thurrott SuperSite for Windows'
|
||||
publisher = 'Paul Thurrott'
|
||||
__author__ = 'Hypernova'
|
||||
language = _('English')
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
html2lrf_options = ['--ignore-tables']
|
||||
html2epub_options = 'linearize_tables = True'
|
||||
remove_tags_before = dict(name='h1')
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<p>--Paul Thurrott.*</body>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '</body>'),
|
||||
]
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open('http://www.winsupersite.com')
|
||||
return br
|
||||
|
||||
feeds = [(u'Supersite for Windows', u'http://www.winsupersite.com/supersite.xml')]
|
12
upload.py
12
upload.py
@ -284,7 +284,13 @@ class gui(OptionlessCommand):
|
||||
manifest = '<RCC>\n<qresource prefix="/">\n%s\n</qresource>\n</RCC>'%'\n'.join(files)
|
||||
with open('images.qrc', 'wb') as f:
|
||||
f.write(manifest)
|
||||
check_call(['pyrcc4', '-o', images, 'images.qrc'])
|
||||
try:
|
||||
check_call(['pyrcc4', '-o', images, 'images.qrc'])
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
raise Exception('You do not have pyrcc4 in your PATH. '
|
||||
'Install the PyQt4 development tools.')
|
||||
else:
|
||||
print 'Images are up to date'
|
||||
finally:
|
||||
@ -670,7 +676,7 @@ class stage3(OptionlessCommand):
|
||||
|
||||
def run(self):
|
||||
OptionlessCommand.run(self)
|
||||
self.misc()
|
||||
self.misc()
|
||||
|
||||
class stage2(OptionlessCommand):
|
||||
description = 'Stage 2 of the build process'
|
||||
@ -699,4 +705,4 @@ class upload(OptionlessCommand):
|
||||
('stage1', None),
|
||||
('stage2', None),
|
||||
('stage3', None)
|
||||
]
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user