mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Pull from trunk
This commit is contained in:
commit
1d6a6586a9
@ -2,7 +2,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
__appname__ = 'calibre'
|
__appname__ = 'calibre'
|
||||||
__version__ = '0.4.138'
|
__version__ = '0.4.139'
|
||||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||||
'''
|
'''
|
||||||
Various run time constants.
|
Various run time constants.
|
||||||
|
@ -233,7 +233,7 @@ class RTFMetadataWriter(MetadataWriterPlugin):
|
|||||||
class MOBIMetadataWriter(MetadataWriterPlugin):
|
class MOBIMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
name = 'Set MOBI metadata'
|
name = 'Set MOBI metadata'
|
||||||
file_types = set(['mobi', 'prc'])
|
file_types = set(['mobi', 'prc', 'azw'])
|
||||||
description = _('Set metadata in %s files')%'MOBI'
|
description = _('Set metadata in %s files')%'MOBI'
|
||||||
author = 'Marshall T. Vandegrift'
|
author = 'Marshall T. Vandegrift'
|
||||||
|
|
||||||
|
@ -33,6 +33,7 @@ class CYBOOKG3(USBMS):
|
|||||||
|
|
||||||
EBOOK_DIR_MAIN = "eBooks"
|
EBOOK_DIR_MAIN = "eBooks"
|
||||||
EBOOK_DIR_CARD = "eBooks"
|
EBOOK_DIR_CARD = "eBooks"
|
||||||
|
THUMBNAIL_HEIGHT = 144
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
def upload_books(self, files, names, on_card=False, end_session=True,
|
def upload_books(self, files, names, on_card=False, end_session=True,
|
||||||
|
@ -30,7 +30,7 @@ def write_t2b(t2bfile, coverdata=None):
|
|||||||
if coverdata != None:
|
if coverdata != None:
|
||||||
coverdata = StringIO.StringIO(coverdata)
|
coverdata = StringIO.StringIO(coverdata)
|
||||||
cover = Image.open(coverdata).convert("L")
|
cover = Image.open(coverdata).convert("L")
|
||||||
cover.thumbnail((96, 144))
|
cover.thumbnail((96, 144), Image.ANTIALIAS)
|
||||||
t2bcover = Image.new('L', (96, 144), 'white')
|
t2bcover = Image.new('L', (96, 144), 'white')
|
||||||
|
|
||||||
x, y = cover.size
|
x, y = cover.size
|
||||||
|
@ -205,9 +205,8 @@ class HTMLProcessor(Processor, Rationalizer):
|
|||||||
def save(self):
|
def save(self):
|
||||||
for meta in list(self.root.xpath('//meta')):
|
for meta in list(self.root.xpath('//meta')):
|
||||||
meta.getparent().remove(meta)
|
meta.getparent().remove(meta)
|
||||||
#for img in self.root.xpath('//img[@src]'):
|
# Strip all comments since Adobe DE is petrified of them
|
||||||
# self.convert_image(img)
|
Processor.save(self, strip_comments=True)
|
||||||
Processor.save(self)
|
|
||||||
|
|
||||||
def remove_first_image(self):
|
def remove_first_image(self):
|
||||||
images = self.root.xpath('//img')
|
images = self.root.xpath('//img')
|
||||||
|
@ -331,9 +331,8 @@ class PreProcessor(object):
|
|||||||
# Convert all entities, since lxml doesn't handle them well
|
# Convert all entities, since lxml doesn't handle them well
|
||||||
(re.compile(r'&(\S+?);'), convert_entities),
|
(re.compile(r'&(\S+?);'), convert_entities),
|
||||||
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
# Remove the <![if/endif tags inserted by everybody's darling, MS Word
|
||||||
(re.compile(r'(?i)<{0,1}!\[(end){0,1}if[^>]*>'), lambda match: ''),
|
(re.compile(r'</{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
|
||||||
# Strip all comments since Adobe DE is petrified of them
|
lambda match: ''),
|
||||||
(re.compile(r'<!--[^>]*>'), lambda match : ''),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# Fix pdftohtml markup
|
# Fix pdftohtml markup
|
||||||
@ -447,7 +446,7 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
def save_path(self):
|
def save_path(self):
|
||||||
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
|
return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
|
||||||
|
|
||||||
def save(self):
|
def save(self, strip_comments=False):
|
||||||
'''
|
'''
|
||||||
Save processed HTML into the content directory.
|
Save processed HTML into the content directory.
|
||||||
Should be called after all HTML processing is finished.
|
Should be called after all HTML processing is finished.
|
||||||
@ -458,7 +457,11 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
svg.set('xmlns', 'http://www.w3.org/2000/svg')
|
svg.set('xmlns', 'http://www.w3.org/2000/svg')
|
||||||
|
|
||||||
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
|
ans = tostring(self.root, pretty_print=self.opts.pretty_print)
|
||||||
ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
|
ans = re.compile(r'<head>', re.IGNORECASE).sub(
|
||||||
|
'<head>\n\t<meta http-equiv="Content-Type" '
|
||||||
|
'content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
|
||||||
|
if strip_comments:
|
||||||
|
ans = re.compile(r'<!--.*?-->', re.DOTALL).sub('', ans)
|
||||||
with open(self.save_path(), 'wb') as f:
|
with open(self.save_path(), 'wb') as f:
|
||||||
f.write(ans)
|
f.write(ans)
|
||||||
return f.name
|
return f.name
|
||||||
@ -594,7 +597,7 @@ class Processor(Parser):
|
|||||||
mark = etree.Element('hr', style=page_break_before)
|
mark = etree.Element('hr', style=page_break_before)
|
||||||
elem.addprevious(mark)
|
elem.addprevious(mark)
|
||||||
|
|
||||||
def save(self):
|
def save(self, strip_comments=False):
|
||||||
style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
|
style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
|
||||||
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
|
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
|
||||||
if sheet is not None:
|
if sheet is not None:
|
||||||
@ -608,7 +611,7 @@ class Processor(Parser):
|
|||||||
if isinstance(raw, unicode):
|
if isinstance(raw, unicode):
|
||||||
raw = raw.encode('utf-8')
|
raw = raw.encode('utf-8')
|
||||||
open(path, 'wb').write(raw)
|
open(path, 'wb').write(raw)
|
||||||
return Parser.save(self)
|
return Parser.save(self, strip_comments=strip_comments)
|
||||||
|
|
||||||
def populate_toc(self, toc):
|
def populate_toc(self, toc):
|
||||||
'''
|
'''
|
||||||
|
@ -30,6 +30,7 @@ preferred_source_formats = [
|
|||||||
'XHTML',
|
'XHTML',
|
||||||
'PRC',
|
'PRC',
|
||||||
'AZW',
|
'AZW',
|
||||||
|
'FB2',
|
||||||
'RTF',
|
'RTF',
|
||||||
'PDF',
|
'PDF',
|
||||||
'TXT',
|
'TXT',
|
||||||
|
@ -38,6 +38,7 @@ def extract_embedded_content(doc):
|
|||||||
open(fname, 'wb').write(data)
|
open(fname, 'wb').write(data)
|
||||||
|
|
||||||
def to_html(fb2file, tdir):
|
def to_html(fb2file, tdir):
|
||||||
|
fb2file = os.path.abspath(fb2file)
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
try:
|
try:
|
||||||
os.chdir(tdir)
|
os.chdir(tdir)
|
||||||
@ -52,7 +53,7 @@ def to_html(fb2file, tdir):
|
|||||||
result = transform(doc)
|
result = transform(doc)
|
||||||
open('index.html', 'wb').write(transform.tostring(result))
|
open('index.html', 'wb').write(transform.tostring(result))
|
||||||
try:
|
try:
|
||||||
mi = get_metadata(open(fb2file, 'rb'))
|
mi = get_metadata(open(fb2file, 'rb'), 'fb2')
|
||||||
except:
|
except:
|
||||||
mi = MetaInformation(None, None)
|
mi = MetaInformation(None, None)
|
||||||
if not mi.title:
|
if not mi.title:
|
||||||
|
BIN
src/calibre/gui2/images/news/e_novine.png
Normal file
BIN
src/calibre/gui2/images/news/e_novine.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 295 B |
@ -114,10 +114,13 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal.
|
|||||||
wget -O- http://calibre.kovidgoyal.net/downloads/${app}-${version}.tar.gz | tar xvz
|
wget -O- http://calibre.kovidgoyal.net/downloads/${app}-${version}.tar.gz | tar xvz
|
||||||
cd calibre*
|
cd calibre*
|
||||||
python setup.py build && sudo python setup.py install
|
python setup.py build && sudo python setup.py install
|
||||||
|
sudo calibre_postinstall
|
||||||
</pre>
|
</pre>
|
||||||
Note that if your distribution does not have a
|
Note that if your distribution does not have a
|
||||||
correctly compiled libunrar.so, ${app} will not
|
correctly compiled libunrar.so, ${app} will not
|
||||||
support rar files.
|
support rar files. The calibre_postinstall step
|
||||||
|
is required for device detection and integration
|
||||||
|
with your desktop environment.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
|
@ -5,10 +5,11 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
Contains the logic for parsing feeds.
|
Contains the logic for parsing feeds.
|
||||||
'''
|
'''
|
||||||
import time, logging, traceback, copy
|
import time, logging, traceback, copy, re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from calibre.web.feeds.feedparser import parse
|
from calibre.web.feeds.feedparser import parse
|
||||||
|
from calibre import entity_to_unicode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
class Article(object):
|
class Article(object):
|
||||||
@ -19,6 +20,11 @@ class Article(object):
|
|||||||
self.downloaded = False
|
self.downloaded = False
|
||||||
self.id = id
|
self.id = id
|
||||||
self.title = title.strip() if title else title
|
self.title = title.strip() if title else title
|
||||||
|
try:
|
||||||
|
self.title = re.sub(r'&(\S+);',
|
||||||
|
entity_to_unicode, self.title)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
self.url = url
|
self.url = url
|
||||||
self.summary = summary
|
self.summary = summary
|
||||||
if summary and not isinstance(summary, unicode):
|
if summary and not isinstance(summary, unicode):
|
||||||
@ -38,6 +44,7 @@ class Article(object):
|
|||||||
self.utctime = datetime(*self.date[:6])
|
self.utctime = datetime(*self.date[:6])
|
||||||
self.localtime = self.utctime + self.time_offset
|
self.localtime = self.utctime + self.time_offset
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return \
|
return \
|
||||||
(u'''\
|
(u'''\
|
||||||
@ -92,6 +99,7 @@ class Feed(object):
|
|||||||
break
|
break
|
||||||
self.parse_article(item)
|
self.parse_article(item)
|
||||||
|
|
||||||
|
|
||||||
def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
|
def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
|
||||||
max_articles_per_feed=100):
|
max_articles_per_feed=100):
|
||||||
self.title = title if title else _('Unknown feed')
|
self.title = title if title else _('Unknown feed')
|
||||||
|
@ -30,7 +30,8 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
|
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
|
||||||
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
|
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
|
||||||
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
|
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
|
||||||
'la_republica', 'physics_today',
|
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
|
||||||
|
'al_jazeera', 'winsupersite',
|
||||||
)]
|
)]
|
||||||
|
|
||||||
import re, imp, inspect, time, os
|
import re, imp, inspect, time, os
|
||||||
|
50
src/calibre/web/feeds/recipes/recipe_al_jazeera.py
Normal file
50
src/calibre/web/feeds/recipes/recipe_al_jazeera.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
aljazeera.net
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AlJazeera(BasicNewsRecipe):
|
||||||
|
title = 'Al Jazeera in English'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Middle East'
|
||||||
|
publisher = 'Al Jazeera'
|
||||||
|
category = 'news, politics, middle east'
|
||||||
|
simultaneous_downloads = 1
|
||||||
|
delay = 4
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'iso-8859-1'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['object','link'])
|
||||||
|
,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
for item in soup.findAll(face=True):
|
||||||
|
del item['face']
|
||||||
|
return soup
|
||||||
|
|
82
src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
Normal file
82
src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
from urlparse import urlparse, urlunparse
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
from threading import RLock
|
||||||
|
|
||||||
|
class ChicagoTribune(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'Chicago Tribune'
|
||||||
|
__author__ = 'Kovid Goyal'
|
||||||
|
description = 'Politics, local and business news from Chicago'
|
||||||
|
language = _('English')
|
||||||
|
use_embedded_content = False
|
||||||
|
articles_are_obfuscated = True
|
||||||
|
remove_tags_before = dict(name='h1')
|
||||||
|
obfuctation_lock = RLock()
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
|
||||||
|
('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
|
||||||
|
('Nation/world', 'http://feeds.chicagotribune.com/chicagotribune/news/nationworld/'),
|
||||||
|
('Hot topics', 'http://feeds.chicagotribune.com/chicagotribune/hottopics/'),
|
||||||
|
('Most E-mailed stories', 'http://feeds.chicagotribune.com/chicagotribune/email/'),
|
||||||
|
('Opinion', 'http://feeds.chicagotribune.com/chicagotribune/opinion/'),
|
||||||
|
('Off Topic', 'http://feeds.chicagotribune.com/chicagotribune/offtopic/'),
|
||||||
|
('Politics', 'http://feeds.chicagotribune.com/chicagotribune/politics/'),
|
||||||
|
('Special Reports', 'http://feeds.chicagotribune.com/chicagotribune/special/'),
|
||||||
|
('Religion News', 'http://feeds.chicagotribune.com/chicagotribune/religion/'),
|
||||||
|
('Business news', 'http://feeds.chicagotribune.com/chicagotribune/business/'),
|
||||||
|
('Jobs and Careers', 'http://feeds.chicagotribune.com/chicagotribune/career/'),
|
||||||
|
('Local scene', 'http://feeds.chicagotribune.com/chicagohomes/localscene/'),
|
||||||
|
('Phil Rosenthal', 'http://feeds.chicagotribune.com/chicagotribune/rosenthal/'),
|
||||||
|
('Tech Buzz', 'http://feeds.chicagotribune.com/chicagotribune/techbuzz/'),
|
||||||
|
('Your Money', 'http://feeds.chicagotribune.com/chicagotribune/yourmoney/'),
|
||||||
|
('Jon Hilkevitch - Getting around', 'http://feeds.chicagotribune.com/chicagotribune/gettingaround/'),
|
||||||
|
('Jon Yates - What\'s your problem?', 'http://feeds.chicagotribune.com/chicagotribune/problem/'),
|
||||||
|
('Garisson Keillor', 'http://feeds.chicagotribune.com/chicagotribune/keillor/'),
|
||||||
|
('Marks Jarvis - On Money', 'http://feeds.chicagotribune.com/chicagotribune/marksjarvisonmoney/'),
|
||||||
|
('Sports', 'http://feeds.chicagotribune.com/chicagotribune/sports/'),
|
||||||
|
('Arts and Architecture', 'http://feeds.chicagotribune.com/chicagotribune/arts/'),
|
||||||
|
('Books', 'http://feeds.chicagotribune.com/chicagotribune/books/'),
|
||||||
|
('Magazine', 'http://feeds.chicagotribune.com/chicagotribune/magazine/'),
|
||||||
|
('Movies', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
|
||||||
|
('Music', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
|
||||||
|
('TV', 'http://feeds.chicagotribune.com/chicagotribune/tv/'),
|
||||||
|
('Hypertext', 'http://feeds.chicagotribune.com/chicagotribune/hypertext/'),
|
||||||
|
('iPhone Blog', 'http://feeds.feedburner.com/redeye/iphoneblog'),
|
||||||
|
('Julie\'s Health Club', 'http://feeds.chicagotribune.com/chicagotribune_julieshealthclub/'),
|
||||||
|
]
|
||||||
|
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
|
||||||
|
|
||||||
|
def get_obfuscated_article(self, url, logger):
|
||||||
|
with self.obfuctation_lock:
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
img = soup.find('img', alt='Print')
|
||||||
|
if img is not None:
|
||||||
|
a = img.parent.find('a', href=True)
|
||||||
|
purl = urlparse(url)
|
||||||
|
xurl = urlunparse(purl[:2] + (a['href'], '', '', ''))
|
||||||
|
soup = self.index_to_soup(xurl)
|
||||||
|
for img in soup.findAll('img', src=True):
|
||||||
|
if img['src'].startswith('/'):
|
||||||
|
img['src'] = urlunparse(purl[:2]+(img['src'], '', '', ''))
|
||||||
|
html = unicode(soup)
|
||||||
|
else:
|
||||||
|
h1 = soup.find(id='page-title')
|
||||||
|
body = soup.find(attrs={'class':re.compile('asset-content')})
|
||||||
|
html = u'<html><head/><body>%s</body></html>'%(unicode(h1)+unicode(body))
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_chicago_tribune.xhtml'))
|
||||||
|
self.temp_files[-1].write(html.encode('utf-8'))
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
return self.temp_files[-1].name
|
||||||
|
|
58
src/calibre/web/feeds/recipes/recipe_e_novine.py
Normal file
58
src/calibre/web/feeds/recipes/recipe_e_novine.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
|
||||||
|
'''
|
||||||
|
e-novine.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class E_novine(BasicNewsRecipe):
|
||||||
|
title = 'E-Novine'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'News from Serbia'
|
||||||
|
publisher = 'E-novine'
|
||||||
|
category = 'news, politics, Balcans'
|
||||||
|
oldest_article = 1
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
encoding = 'cp1250'
|
||||||
|
cover_url = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
|
||||||
|
remove_javascript = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = _('Serbian')
|
||||||
|
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||||
|
|
||||||
|
html2lrf_options = [
|
||||||
|
'--comment', description
|
||||||
|
, '--category', category
|
||||||
|
, '--publisher', publisher
|
||||||
|
]
|
||||||
|
|
||||||
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['object','link','embed','iframe'])]
|
||||||
|
|
||||||
|
feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
soup.html['xml:lang'] = 'sr-Latn-ME'
|
||||||
|
soup.html['lang'] = 'sr-Latn-ME'
|
||||||
|
mtag = '<meta http-equiv="Content-Language" content="sr-Latn-ME"/>'
|
||||||
|
soup.head.insert(0,mtag)
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
|
||||||
|
if ftag:
|
||||||
|
it = ftag.div
|
||||||
|
it.extract()
|
||||||
|
ftag.div.extract()
|
||||||
|
ftag.insert(0,it)
|
||||||
|
return soup
|
@ -19,7 +19,7 @@ class Infobae(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = _('Spanish')
|
language = _('Spanish')
|
||||||
encoding = 'iso-8859-1'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
|
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
@ -28,6 +28,7 @@ class Infobae(BasicNewsRecipe):
|
|||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
, '--ignore-tables'
|
, '--ignore-tables'
|
||||||
|
, '--ignore-colors'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||||
|
@ -21,14 +21,16 @@ class LaSegunda(BasicNewsRecipe):
|
|||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
language = _('Spanish')
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
, '--category', category
|
, '--category', category
|
||||||
, '--publisher', publisher
|
, '--publisher', publisher
|
||||||
|
, '--ignore-tables'
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='table')]
|
keep_only_tags = [dict(name='table')]
|
||||||
|
|
||||||
@ -52,10 +54,7 @@ class LaSegunda(BasicNewsRecipe):
|
|||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
mtag = '<meta http-equiv="Content-Language" content="es-CL"/>'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
for item in soup.findAll(name='table', width=True):
|
|
||||||
del item['width']
|
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Spanish')
|
|
@ -7,11 +7,10 @@ pagina12.com.ar
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Pagina12(BasicNewsRecipe):
|
class Pagina12(BasicNewsRecipe):
|
||||||
title = u'Pagina/12'
|
title = 'Pagina/12'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = 'Noticias de Argentina y el resto del mundo'
|
description = 'Noticias de Argentina y el resto del mundo'
|
||||||
publisher = 'La Pagina S.A.'
|
publisher = 'La Pagina S.A.'
|
||||||
@ -20,9 +19,11 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
|
cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/tapagn.jpg')
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
language = _('Spanish')
|
||||||
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
@ -50,5 +51,3 @@ class Pagina12(BasicNewsRecipe):
|
|||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
language = _('Spanish')
|
|
28
src/calibre/web/feeds/recipes/recipe_winsupersite.py
Normal file
28
src/calibre/web/feeds/recipes/recipe_winsupersite.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Winsupersite(BasicNewsRecipe):
|
||||||
|
title = u'Supersite for Windows'
|
||||||
|
description = u'Paul Thurrott SuperSite for Windows'
|
||||||
|
publisher = 'Paul Thurrott'
|
||||||
|
__author__ = 'Hypernova'
|
||||||
|
language = _('English')
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_javascript = True
|
||||||
|
html2lrf_options = ['--ignore-tables']
|
||||||
|
html2epub_options = 'linearize_tables = True'
|
||||||
|
remove_tags_before = dict(name='h1')
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<p>--Paul Thurrott.*</body>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '</body>'),
|
||||||
|
]
|
||||||
|
def get_browser(self):
|
||||||
|
br = BasicNewsRecipe.get_browser()
|
||||||
|
br.open('http://www.winsupersite.com')
|
||||||
|
return br
|
||||||
|
|
||||||
|
feeds = [(u'Supersite for Windows', u'http://www.winsupersite.com/supersite.xml')]
|
@ -284,7 +284,13 @@ class gui(OptionlessCommand):
|
|||||||
manifest = '<RCC>\n<qresource prefix="/">\n%s\n</qresource>\n</RCC>'%'\n'.join(files)
|
manifest = '<RCC>\n<qresource prefix="/">\n%s\n</qresource>\n</RCC>'%'\n'.join(files)
|
||||||
with open('images.qrc', 'wb') as f:
|
with open('images.qrc', 'wb') as f:
|
||||||
f.write(manifest)
|
f.write(manifest)
|
||||||
|
try:
|
||||||
check_call(['pyrcc4', '-o', images, 'images.qrc'])
|
check_call(['pyrcc4', '-o', images, 'images.qrc'])
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
raise Exception('You do not have pyrcc4 in your PATH. '
|
||||||
|
'Install the PyQt4 development tools.')
|
||||||
else:
|
else:
|
||||||
print 'Images are up to date'
|
print 'Images are up to date'
|
||||||
finally:
|
finally:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user