From 29d842db3ed632afc4402a9537d444e68b27d1d5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Fri, 20 Feb 2009 09:28:47 -0800
Subject: [PATCH 01/14] IGN:Fix #1901 (Recipe update for EPUB generation)
---
src/calibre/trac/plugins/templates/linux.html | 5 ++++-
src/calibre/web/feeds/recipes/recipe_la_segunda.py | 13 ++++++-------
upload.py | 8 +++++++-
3 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/src/calibre/trac/plugins/templates/linux.html b/src/calibre/trac/plugins/templates/linux.html
index 5f0f287737..066f3c9b6d 100644
--- a/src/calibre/trac/plugins/templates/linux.html
+++ b/src/calibre/trac/plugins/templates/linux.html
@@ -114,10 +114,13 @@ sudo python -c "import urllib2; exec urllib2.urlopen('http://calibre.kovidgoyal.
wget -O- http://calibre.kovidgoyal.net/downloads/${app}-${version}.tar.gz | tar xvz
cd calibre*
python setup.py build && sudo python setup.py install
+sudo calibre_postinstall
Note that if your distribution does not have a
correctly compiled libunrar.so, ${app} will not
- support rar files.
+ support rar files. The calibre_postinstall step
+ is required for device detection and integration
+ with your desktop environment.
diff --git a/src/calibre/web/feeds/recipes/recipe_la_segunda.py b/src/calibre/web/feeds/recipes/recipe_la_segunda.py
index d049d9c92b..5852e6ba9a 100644
--- a/src/calibre/web/feeds/recipes/recipe_la_segunda.py
+++ b/src/calibre/web/feeds/recipes/recipe_la_segunda.py
@@ -6,8 +6,8 @@ __copyright__ = '2009, Darko Miletic '
lasegunda.com
'''
-from calibre.web.feeds.news import BasicNewsRecipe
-
+from calibre.web.feeds.news import BasicNewsRecipe
+
class LaSegunda(BasicNewsRecipe):
title = 'La Segunda'
__author__ = 'Darko Miletic'
@@ -21,14 +21,16 @@ class LaSegunda(BasicNewsRecipe):
encoding = 'cp1252'
cover_url = 'http://www.lasegunda.com/imagenes/logotipo_lasegunda_Oli.gif'
remove_javascript = True
+ language = _('Spanish')
html2lrf_options = [
- '--comment', description
+ '--comment', description
, '--category', category
, '--publisher', publisher
+ , '--ignore-tables'
]
- html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
+ html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} "'
keep_only_tags = [dict(name='table')]
@@ -52,10 +54,7 @@ class LaSegunda(BasicNewsRecipe):
def preprocess_html(self, soup):
mtag = ''
soup.head.insert(0,mtag)
- for item in soup.findAll(name='table', width=True):
- del item['width']
for item in soup.findAll(style=True):
del item['style']
return soup
- language = _('Spanish')
\ No newline at end of file
diff --git a/upload.py b/upload.py
index 5bd473e08d..2aeb1461ee 100644
--- a/upload.py
+++ b/upload.py
@@ -284,7 +284,13 @@ class gui(OptionlessCommand):
manifest = '\n\n%s\n\n'%'\n'.join(files)
with open('images.qrc', 'wb') as f:
f.write(manifest)
- check_call(['pyrcc4', '-o', images, 'images.qrc'])
+ try:
+ check_call(['pyrcc4', '-o', images, 'images.qrc'])
+ except:
+ import traceback
+ traceback.print_exc()
+ raise Exception('You do not have pyrcc4 in your PATH. '
+ 'Install the PyQt4 development tools.')
else:
print 'Images are up to date'
finally:
From 8e248482aa5c7a0777cda9b5117fb305e836f04c Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Fri, 20 Feb 2009 10:17:20 -0800
Subject: [PATCH 02/14] EPUB Output: Fix regression in handling of comments
that would occassionally cause content in the comments to leak into the text
---
src/calibre/ebooks/epub/from_html.py | 5 ++---
src/calibre/ebooks/html.py | 14 ++++++++------
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py
index afd0af73d7..ffe402538f 100644
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@@ -205,9 +205,8 @@ class HTMLProcessor(Processor, Rationalizer):
def save(self):
for meta in list(self.root.xpath('//meta')):
meta.getparent().remove(meta)
- #for img in self.root.xpath('//img[@src]'):
- # self.convert_image(img)
- Processor.save(self)
+ # Strip all comments since Adobe DE is petrified of them
+ Processor.save(self, strip_comments=True)
def remove_first_image(self):
images = self.root.xpath('//img')
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index 340f5636eb..1c15973d3b 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -332,8 +332,6 @@ class PreProcessor(object):
(re.compile(r'&(\S+?);'), convert_entities),
# Remove the ]*>'), lambda match: ''),
- # Strip all comments since Adobe DE is petrified of them
- (re.compile(r'', re.DOTALL).sub('', ans)
with open(self.save_path(), 'wb') as f:
f.write(ans)
return f.name
@@ -594,7 +596,7 @@ class Processor(Parser):
mark = etree.Element('hr', style=page_break_before)
elem.addprevious(mark)
- def save(self):
+ def save(self, strip_comments=False):
style_path = os.path.splitext(os.path.basename(self.save_path()))[0]
for i, sheet in enumerate([self.stylesheet, self.font_css, self.override_css]):
if sheet is not None:
@@ -608,7 +610,7 @@ class Processor(Parser):
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
open(path, 'wb').write(raw)
- return Parser.save(self)
+ return Parser.save(self, strip_comments=strip_comments)
def populate_toc(self, toc):
'''
From af8f3b56ce86a33ee8e0ec8f8e909ce2c0bddf55 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Fri, 20 Feb 2009 10:58:45 -0800
Subject: [PATCH 03/14] News download: Handle HTML entities in article titles
---
src/calibre/web/feeds/__init__.py | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py
index 3bc1110db9..82e3f40c10 100644
--- a/src/calibre/web/feeds/__init__.py
+++ b/src/calibre/web/feeds/__init__.py
@@ -5,10 +5,11 @@ __copyright__ = '2008, Kovid Goyal '
'''
Contains the logic for parsing feeds.
'''
-import time, logging, traceback, copy
+import time, logging, traceback, copy, re
from datetime import datetime
from calibre.web.feeds.feedparser import parse
+from calibre import entity_to_unicode
from lxml import html
class Article(object):
@@ -19,6 +20,12 @@ class Article(object):
self.downloaded = False
self.id = id
self.title = title.strip() if title else title
+ try:
+ self.title = re.sub(r'&(\S+);',
+ entity_to_unicode, self.title)
+ print 11111, repr(self.title)
+ except:
+ pass
self.url = url
self.summary = summary
if summary and not isinstance(summary, unicode):
@@ -37,6 +44,7 @@ class Article(object):
self.date = published
self.utctime = datetime(*self.date[:6])
self.localtime = self.utctime + self.time_offset
+
def __repr__(self):
return \
@@ -91,7 +99,8 @@ class Feed(object):
if len(self.articles) >= max_articles_per_feed:
break
self.parse_article(item)
-
+
+
def populate_from_preparsed_feed(self, title, articles, oldest_article=7,
max_articles_per_feed=100):
self.title = title if title else _('Unknown feed')
From d221e7e448f245867479671f316520597cf86115 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Fri, 20 Feb 2009 11:42:28 -0800
Subject: [PATCH 04/14] New recipe for The Chicago Tribune by Kovid Goyal
---
src/calibre/web/feeds/__init__.py | 1 -
src/calibre/web/feeds/recipes/__init__.py | 2 +-
.../feeds/recipes/recipe_chicago_tribune.py | 82 +++++++++++++++++++
3 files changed, 83 insertions(+), 2 deletions(-)
create mode 100644 src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py
index 82e3f40c10..3f0ec414a2 100644
--- a/src/calibre/web/feeds/__init__.py
+++ b/src/calibre/web/feeds/__init__.py
@@ -23,7 +23,6 @@ class Article(object):
try:
self.title = re.sub(r'&(\S+);',
entity_to_unicode, self.title)
- print 11111, repr(self.title)
except:
pass
self.url = url
diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index 19c4f2827a..92fbbda555 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -30,7 +30,7 @@ recipe_modules = ['recipe_' + r for r in (
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
- 'la_republica', 'physics_today',
+ 'la_republica', 'physics_today', 'chicago_tribune',
)]
import re, imp, inspect, time, os
diff --git a/src/calibre/web/feeds/recipes/recipe_chicago_tribune.py b/src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
new file mode 100644
index 0000000000..a4754dd751
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_chicago_tribune.py
@@ -0,0 +1,82 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import re
+from urlparse import urlparse, urlunparse
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+from threading import RLock
+
+class ChicagoTribune(BasicNewsRecipe):
+
+ title = 'Chicago Tribune'
+ __author__ = 'Kovid Goyal'
+ description = 'Politics, local and business news from Chicago'
+ language = _('English')
+ use_embedded_content = False
+ articles_are_obfuscated = True
+ remove_tags_before = dict(name='h1')
+ obfuctation_lock = RLock()
+
+ feeds = [
+ ('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
+ ('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
+ ('Nation/world', 'http://feeds.chicagotribune.com/chicagotribune/news/nationworld/'),
+ ('Hot topics', 'http://feeds.chicagotribune.com/chicagotribune/hottopics/'),
+ ('Most E-mailed stories', 'http://feeds.chicagotribune.com/chicagotribune/email/'),
+ ('Opinion', 'http://feeds.chicagotribune.com/chicagotribune/opinion/'),
+ ('Off Topic', 'http://feeds.chicagotribune.com/chicagotribune/offtopic/'),
+ ('Politics', 'http://feeds.chicagotribune.com/chicagotribune/politics/'),
+ ('Special Reports', 'http://feeds.chicagotribune.com/chicagotribune/special/'),
+ ('Religion News', 'http://feeds.chicagotribune.com/chicagotribune/religion/'),
+ ('Business news', 'http://feeds.chicagotribune.com/chicagotribune/business/'),
+ ('Jobs and Careers', 'http://feeds.chicagotribune.com/chicagotribune/career/'),
+ ('Local scene', 'http://feeds.chicagotribune.com/chicagohomes/localscene/'),
+ ('Phil Rosenthal', 'http://feeds.chicagotribune.com/chicagotribune/rosenthal/'),
+ ('Tech Buzz', 'http://feeds.chicagotribune.com/chicagotribune/techbuzz/'),
+ ('Your Money', 'http://feeds.chicagotribune.com/chicagotribune/yourmoney/'),
+ ('Jon Hilkevitch - Getting around', 'http://feeds.chicagotribune.com/chicagotribune/gettingaround/'),
+ ('Jon Yates - What\'s your problem?', 'http://feeds.chicagotribune.com/chicagotribune/problem/'),
+ ('Garisson Keillor', 'http://feeds.chicagotribune.com/chicagotribune/keillor/'),
+ ('Marks Jarvis - On Money', 'http://feeds.chicagotribune.com/chicagotribune/marksjarvisonmoney/'),
+ ('Sports', 'http://feeds.chicagotribune.com/chicagotribune/sports/'),
+ ('Arts and Architecture', 'http://feeds.chicagotribune.com/chicagotribune/arts/'),
+ ('Books', 'http://feeds.chicagotribune.com/chicagotribune/books/'),
+ ('Magazine', 'http://feeds.chicagotribune.com/chicagotribune/magazine/'),
+ ('Movies', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
+ ('Music', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
+ ('TV', 'http://feeds.chicagotribune.com/chicagotribune/tv/'),
+ ('Hypertext', 'http://feeds.chicagotribune.com/chicagotribune/hypertext/'),
+ ('iPhone Blog', 'http://feeds.feedburner.com/redeye/iphoneblog'),
+ ('Julie\'s Health Club', 'http://feeds.chicagotribune.com/chicagotribune_julieshealthclub/'),
+ ]
+
+ temp_files = []
+
+ def get_article_url(self, article):
+ return article.get('feedburner_origlink', article.get('guid', article.get('link')))
+
+ def get_obfuscated_article(self, url, logger):
+ with self.obfuctation_lock:
+ soup = self.index_to_soup(url)
+ img = soup.find('img', alt='Print')
+ if img is not None:
+ a = img.parent.find('a', href=True)
+ purl = urlparse(url)
+ xurl = urlunparse(purl[:2] + (a['href'], '', '', ''))
+ soup = self.index_to_soup(xurl)
+ for img in soup.findAll('img', src=True):
+ if img['src'].startswith('/'):
+ img['src'] = urlunparse(purl[:2]+(img['src'], '', '', ''))
+ html = unicode(soup)
+ else:
+ h1 = soup.find(id='page-title')
+ body = soup.find(attrs={'class':re.compile('asset-content')})
+ html = u'%s'%(unicode(h1)+unicode(body))
+ self.temp_files.append(PersistentTemporaryFile('_chicago_tribune.xhtml'))
+ self.temp_files[-1].write(html.encode('utf-8'))
+ self.temp_files[-1].close()
+ return self.temp_files[-1].name
+
From e6473047fec2fde9a31f1bdb437d4459d0031726 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Fri, 20 Feb 2009 11:47:03 -0800
Subject: [PATCH 05/14] FB2 input:Fix regression that prevented metadata from
being read from input FB2 file when converting on the command line
---
src/calibre/ebooks/lrf/fb2/convert_from.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/lrf/fb2/convert_from.py b/src/calibre/ebooks/lrf/fb2/convert_from.py
index dc4644d292..24562e708c 100644
--- a/src/calibre/ebooks/lrf/fb2/convert_from.py
+++ b/src/calibre/ebooks/lrf/fb2/convert_from.py
@@ -38,6 +38,7 @@ def extract_embedded_content(doc):
open(fname, 'wb').write(data)
def to_html(fb2file, tdir):
+ fb2file = os.path.abspath(fb2file)
cwd = os.getcwd()
try:
os.chdir(tdir)
@@ -52,7 +53,7 @@ def to_html(fb2file, tdir):
result = transform(doc)
open('index.html', 'wb').write(transform.tostring(result))
try:
- mi = get_metadata(open(fb2file, 'rb'))
+ mi = get_metadata(open(fb2file, 'rb'), 'fb2')
except:
mi = MetaInformation(None, None)
if not mi.title:
From 70b99872c670aeb11f2cfa11ed8ff82a79f129c2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Fri, 20 Feb 2009 13:07:32 -0800
Subject: [PATCH 06/14] EPUB Output:Be less aggressive when stripping invalid
HTML constructs inserted by MS Word. Fixes regression in the Time recipe.
---
src/calibre/ebooks/html.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index 1c15973d3b..1f1e6b94b1 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -331,7 +331,8 @@ class PreProcessor(object):
# Convert all entities, since lxml doesn't handle them well
(re.compile(r'&(\S+?);'), convert_entities),
# Remove the ]*>'), lambda match: ''),
+ (re.compile(r'{0,1}!\[(end){0,1}if\]{0,1}>', re.IGNORECASE),
+ lambda match: ''),
]
# Fix pdftohtml markup
From c5bae5d403a4e9518372d5889212ca2813ace9e5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Fri, 20 Feb 2009 14:40:16 -0800
Subject: [PATCH 07/14] Fix #1888 (Bulk Convert Problem)
---
src/calibre/ebooks/lrf/__init__.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/calibre/ebooks/lrf/__init__.py b/src/calibre/ebooks/lrf/__init__.py
index 2f9d724ce2..ae74e429ad 100644
--- a/src/calibre/ebooks/lrf/__init__.py
+++ b/src/calibre/ebooks/lrf/__init__.py
@@ -30,6 +30,7 @@ preferred_source_formats = [
'XHTML',
'PRC',
'AZW',
+ 'FB2',
'RTF',
'PDF',
'TXT',
From 7475c520ab5fba4b1598e093672e93dee0779463 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Fri, 20 Feb 2009 16:19:34 -0800
Subject: [PATCH 08/14] version 0.4.139
---
src/calibre/constants.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/constants.py b/src/calibre/constants.py
index a903e67e04..e6f9b771e9 100644
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
-__version__ = '0.4.138'
+__version__ = '0.4.139'
__author__ = "Kovid Goyal "
'''
Various run time constants.
From 40c50665c5a7572278f3b00594792e3f63794d4c Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Fri, 20 Feb 2009 16:32:55 -0800
Subject: [PATCH 09/14] IGN:Tag release
From 544346837cfddd44c9c6f0647061864b8bd208d9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 21 Feb 2009 11:17:59 -0800
Subject: [PATCH 10/14] New recipes for E-Novine and Al Jazeera by Darko
Miletic
---
src/calibre/gui2/images/news/e_novine.png | Bin 0 -> 295 bytes
src/calibre/web/feeds/recipes/__init__.py | 3 +-
.../web/feeds/recipes/recipe_al_jazeera.py | 50 +++++++++++++++
.../web/feeds/recipes/recipe_e_novine.py | 58 ++++++++++++++++++
upload.py | 4 +-
5 files changed, 112 insertions(+), 3 deletions(-)
create mode 100644 src/calibre/gui2/images/news/e_novine.png
create mode 100644 src/calibre/web/feeds/recipes/recipe_al_jazeera.py
create mode 100644 src/calibre/web/feeds/recipes/recipe_e_novine.py
diff --git a/src/calibre/gui2/images/news/e_novine.png b/src/calibre/gui2/images/news/e_novine.png
new file mode 100644
index 0000000000000000000000000000000000000000..285e1e6a4dea0e28776141fd3c836b2ae5b3b1d6
GIT binary patch
literal 295
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b
zK-vS0-A-oPfdtD69Mgd`SU*F|v9*VRoB&T3#}JFt$tev?W#88Q=TT{xZ+}`sQbIyv
zhR4Mg-VG0oB*h)dHMtGDK1lj)6#pRg;d8pAvZ8*Vy+9`?_phA|6aKdsO9(tJmuS$I
z*>H!0p~qC_?}}I6AP1|KxJHyD7o{ear0S*s2?iqr14~^4BV8lI5JO8V0~0Gl18oBX
wD+7bSi3>oE0c*(3PsvQH#L!@1Ze?U)Wnu!+V13Mk6R3f~)78&qol`;+04`uupa1{>
literal 0
HcmV?d00001
diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index 92fbbda555..eed5670cac 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -30,7 +30,8 @@ recipe_modules = ['recipe_' + r for r in (
'honoluluadvertiser', 'starbulletin', 'exiled', 'indy_star', 'dna',
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
- 'la_republica', 'physics_today', 'chicago_tribune',
+ 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
+ 'al_jazeera',
)]
import re, imp, inspect, time, os
diff --git a/src/calibre/web/feeds/recipes/recipe_al_jazeera.py b/src/calibre/web/feeds/recipes/recipe_al_jazeera.py
new file mode 100644
index 0000000000..9923f00392
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_al_jazeera.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Darko Miletic '
+
+'''
+aljazeera.net
+'''
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AlJazeera(BasicNewsRecipe):
+ title = 'Al Jazeera in English'
+ __author__ = 'Darko Miletic'
+ description = 'News from Middle East'
+ publisher = 'Al Jazeera'
+ category = 'news, politics, middle east'
+ simultaneous_downloads = 1
+ delay = 4
+ oldest_article = 1
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ encoding = 'iso-8859-1'
+ remove_javascript = True
+ use_embedded_content = False
+
+ html2lrf_options = [
+ '--comment', description
+ , '--category', category
+ , '--publisher', publisher
+ , '--ignore-tables'
+ ]
+
+ html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
+
+ keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
+
+ remove_tags = [
+ dict(name=['object','link'])
+ ,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
+ ]
+
+ feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ for item in soup.findAll(face=True):
+ del item['face']
+ return soup
+
diff --git a/src/calibre/web/feeds/recipes/recipe_e_novine.py b/src/calibre/web/feeds/recipes/recipe_e_novine.py
new file mode 100644
index 0000000000..83654fe4c8
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_e_novine.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Darko Miletic '
+
+'''
+e-novine.com
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class E_novine(BasicNewsRecipe):
+ title = 'E-Novine'
+ __author__ = 'Darko Miletic'
+ description = 'News from Serbia'
+ publisher = 'E-novine'
+ category = 'news, politics, Balcans'
+ oldest_article = 1
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ encoding = 'cp1250'
+ cover_url = 'http://www.e-novine.com/slike/slike_3/r1/g2008/m03/y3165525326702598.jpg'
+ remove_javascript = True
+ use_embedded_content = False
+ language = _('Serbian')
+ extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+
+ html2lrf_options = [
+ '--comment', description
+ , '--category', category
+ , '--publisher', publisher
+ ]
+
+ html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0em; margin-top: 0em; margin-bottom: 0.5em} img {margin-top: 0em; margin-bottom: 0.4em}"'
+
+ preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
+
+ keep_only_tags = [dict(name='div', attrs={'id':['css_47_0_2844H']})]
+
+ remove_tags = [dict(name=['object','link','embed','iframe'])]
+
+ feeds = [(u'Sve vesti', u'http://www.e-novine.com/rss/e-novine.xml' )]
+
+ def preprocess_html(self, soup):
+ soup.html['xml:lang'] = 'sr-Latn-ME'
+ soup.html['lang'] = 'sr-Latn-ME'
+ mtag = ''
+ soup.head.insert(0,mtag)
+ for item in soup.findAll(style=True):
+ del item['style']
+ ftag = soup.find('div', attrs={'id':'css_47_0_2844H'})
+ if ftag:
+ it = ftag.div
+ it.extract()
+ ftag.div.extract()
+ ftag.insert(0,it)
+ return soup
diff --git a/upload.py b/upload.py
index 2aeb1461ee..cab426b191 100644
--- a/upload.py
+++ b/upload.py
@@ -676,7 +676,7 @@ class stage3(OptionlessCommand):
def run(self):
OptionlessCommand.run(self)
- self.misc()
+ self.misc()
class stage2(OptionlessCommand):
description = 'Stage 2 of the build process'
@@ -705,4 +705,4 @@ class upload(OptionlessCommand):
('stage1', None),
('stage2', None),
('stage3', None)
- ]
\ No newline at end of file
+ ]
From 2eee9da901dac31ca2259984014e61dc54b1c7b2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 21 Feb 2009 11:23:06 -0800
Subject: [PATCH 11/14] IGN:Fix #1906 (Minor fixes in spanish recipes)
---
src/calibre/web/feeds/recipes/recipe_infobae.py | 5 +++--
src/calibre/web/feeds/recipes/recipe_pagina12.py | 12 +++++-------
2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/src/calibre/web/feeds/recipes/recipe_infobae.py b/src/calibre/web/feeds/recipes/recipe_infobae.py
index 13c52ca6b1..b5c867b914 100644
--- a/src/calibre/web/feeds/recipes/recipe_infobae.py
+++ b/src/calibre/web/feeds/recipes/recipe_infobae.py
@@ -19,7 +19,7 @@ class Infobae(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
language = _('Spanish')
- encoding = 'iso-8859-1'
+ encoding = 'cp1252'
cover_url = 'http://www.infobae.com/imgs/header/header.gif'
remove_javascript = True
@@ -28,9 +28,10 @@ class Infobae(BasicNewsRecipe):
, '--category' , category
, '--publisher', publisher
, '--ignore-tables'
+ , '--ignore-colors'
]
- html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
+ html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
remove_tags = [
dict(name=['embed','link','object'])
diff --git a/src/calibre/web/feeds/recipes/recipe_pagina12.py b/src/calibre/web/feeds/recipes/recipe_pagina12.py
index b821ed0b68..f3ed1110fa 100644
--- a/src/calibre/web/feeds/recipes/recipe_pagina12.py
+++ b/src/calibre/web/feeds/recipes/recipe_pagina12.py
@@ -8,10 +8,8 @@ pagina12.com.ar
from calibre import strftime
-from calibre.web.feeds.news import BasicNewsRecipe
-
class Pagina12(BasicNewsRecipe):
- title = u'Pagina/12'
+ title = 'Pagina/12'
__author__ = 'Darko Miletic'
description = 'Noticias de Argentina y el resto del mundo'
publisher = 'La Pagina S.A.'
@@ -20,12 +18,14 @@ class Pagina12(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'cp1252'
- cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/TAPAN.jpg')
+ cover_url = strftime('http://www.pagina12.com.ar/fotos/%Y%m%d/diario/tapagn.jpg')
remove_javascript = True
use_embedded_content = False
+ language = _('Spanish')
+
html2lrf_options = [
- '--comment', description
+ '--comment', description
, '--category', category
, '--publisher', publisher
]
@@ -50,5 +50,3 @@ class Pagina12(BasicNewsRecipe):
for item in soup.findAll(style=True):
del item['style']
return soup
-
- language = _('Spanish')
\ No newline at end of file
From 0467af0c29d5571da100b92f682de4e7dfa82476 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 21 Feb 2009 11:38:24 -0800
Subject: [PATCH 12/14] Cybook Gen3:Fix thumbnail generation.
---
src/calibre/devices/cybookg3/driver.py | 1 +
src/calibre/devices/cybookg3/t2b.py | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py
index f092473675..eef32594eb 100644
--- a/src/calibre/devices/cybookg3/driver.py
+++ b/src/calibre/devices/cybookg3/driver.py
@@ -33,6 +33,7 @@ class CYBOOKG3(USBMS):
EBOOK_DIR_MAIN = "eBooks"
EBOOK_DIR_CARD = "eBooks"
+ THUMBNAIL_HEIGHT = 144
SUPPORTS_SUB_DIRS = True
def upload_books(self, files, names, on_card=False, end_session=True,
diff --git a/src/calibre/devices/cybookg3/t2b.py b/src/calibre/devices/cybookg3/t2b.py
index 5bf512f22d..7aaeeb63d7 100644
--- a/src/calibre/devices/cybookg3/t2b.py
+++ b/src/calibre/devices/cybookg3/t2b.py
@@ -30,7 +30,7 @@ def write_t2b(t2bfile, coverdata=None):
if coverdata != None:
coverdata = StringIO.StringIO(coverdata)
cover = Image.open(coverdata).convert("L")
- cover.thumbnail((96, 144))
+ cover.thumbnail((96, 144), Image.ANTIALIAS)
t2bcover = Image.new('L', (96, 144), 'white')
x, y = cover.size
From cfcccb044340f850a45dfa76e7a099d9c78bf2a0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 21 Feb 2009 13:12:52 -0800
Subject: [PATCH 13/14] IGN:...
---
src/calibre/web/feeds/recipes/recipe_pagina12.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/calibre/web/feeds/recipes/recipe_pagina12.py b/src/calibre/web/feeds/recipes/recipe_pagina12.py
index f3ed1110fa..039e8a8e15 100644
--- a/src/calibre/web/feeds/recipes/recipe_pagina12.py
+++ b/src/calibre/web/feeds/recipes/recipe_pagina12.py
@@ -7,6 +7,7 @@ pagina12.com.ar
'''
from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
class Pagina12(BasicNewsRecipe):
title = 'Pagina/12'
From 6c8d6a4edb0c2b90537db744caf37f51c8badc6f Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 21 Feb 2009 18:56:01 -0800
Subject: [PATCH 14/14] New recipe for Windows SuperSite by Hypernova
---
src/calibre/customize/builtins.py | 4 +--
src/calibre/web/feeds/recipes/__init__.py | 2 +-
.../web/feeds/recipes/recipe_winsupersite.py | 28 +++++++++++++++++++
3 files changed, 31 insertions(+), 3 deletions(-)
create mode 100644 src/calibre/web/feeds/recipes/recipe_winsupersite.py
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index a40878480f..14d3c79062 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -233,7 +233,7 @@ class RTFMetadataWriter(MetadataWriterPlugin):
class MOBIMetadataWriter(MetadataWriterPlugin):
name = 'Set MOBI metadata'
- file_types = set(['mobi', 'prc'])
+ file_types = set(['mobi', 'prc', 'azw'])
description = _('Set metadata in %s files')%'MOBI'
author = 'Marshall T. Vandegrift'
@@ -246,4 +246,4 @@ plugins = [HTML2ZIP]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
- x.__name__.endswith('MetadataWriter')]
\ No newline at end of file
+ x.__name__.endswith('MetadataWriter')]
diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index eed5670cac..579291a83b 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -31,7 +31,7 @@ recipe_modules = ['recipe_' + r for r in (
'pobjeda', 'chicago_breaking_news', 'glasgow_herald', 'linuxdevices',
'hindu', 'cincinnati_enquirer', 'physics_world', 'pressonline',
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
- 'al_jazeera',
+ 'al_jazeera', 'winsupersite',
)]
import re, imp, inspect, time, os
diff --git a/src/calibre/web/feeds/recipes/recipe_winsupersite.py b/src/calibre/web/feeds/recipes/recipe_winsupersite.py
new file mode 100644
index 0000000000..fc6bc54da2
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_winsupersite.py
@@ -0,0 +1,28 @@
+import re
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Winsupersite(BasicNewsRecipe):
+ title = u'Supersite for Windows'
+ description = u'Paul Thurrott SuperSite for Windows'
+ publisher = 'Paul Thurrott'
+ __author__ = 'Hypernova'
+ language = _('English')
+ oldest_article = 30
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ use_embedded_content = False
+ remove_javascript = True
+ html2lrf_options = ['--ignore-tables']
+ html2epub_options = 'linearize_tables = True'
+ remove_tags_before = dict(name='h1')
+ preprocess_regexps = [
+ (re.compile(r'--Paul Thurrott.*', re.DOTALL|re.IGNORECASE),
+ lambda match: '