diff --git a/.bzrignore b/.bzrignore
index 6b6450f1f9..8711782023 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -40,6 +40,7 @@ recipes/.gitignore
recipes/README.md
recipes/icon_checker.py
recipes/readme_updater.py
+recipes/garfield.recipe
recipes/katalog_egazeciarz.recipe
recipes/tv_axnscifi.recipe
recipes/tv_comedycentral.recipe
@@ -63,6 +64,7 @@ recipes/tv_tvppolonia.recipe
recipes/tv_tvpuls.recipe
recipes/tv_viasathistory.recipe
recipes/icons/katalog_egazeciarz.png
+recipes/icons/garfield.png
recipes/icons/tv_axnscifi.png
recipes/icons/tv_comedycentral.png
recipes/icons/tv_discoveryscience.png
diff --git a/Changelog.yaml b/Changelog.yaml
index 129285a6b1..8fb8965e8d 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -1,3 +1,4 @@
+# vim:fileencoding=UTF-8:ts=2:sw=2:sta:et:sts=2:ai
# Each release can have new features and bug fixes. Each of which
# must have a title and can optionally have linked tickets and a description.
# In addition they can have a type field which defaults to minor, but should be major
@@ -19,6 +20,101 @@
# new recipes:
# - title:
+- version: 0.9.26
+ date: 2013-04-05
+
+ new features:
+ - title: "PDF Output: Allow using templates to create arbitrary headers and footers. Look under PDF Output in the conversion dialog for this feature."
+
+ - title: "ToC Editor: Allow generating the ToC directly from individual files inside the ebook. Useful for EPUBs that have individual chapters in single files."
+ tickets: [1163520]
+
+ - title: "ToC Editor: Add buttons to indent/unindent the current entry"
+
+ - title: "ToC Editor: Right-click menu to perform various useful actions on entries in the ToC"
+
+ - title: "Column icons: Allow use of wide images as column icons"
+
+ - title: "Add USB ids for the Palm Pre2 and Samsung Galaxy phone to the device drivers"
+ tickets: [1162293,1163115]
+
+ bug fixes:
+ - title: "PDF Output: Fix generating page numbers causing links to not work."
+ tickets: [1162573]
+
+ - title: "Wrong filename output in error message when 'Guide reference not found'"
+ tickets: [1163659]
+
+ - title: "Get Books: Update Amazon, Barnes & Noble, Waterstones and Gutenberg store plugins for website change"
+
+ - title: "PDF Output: Fix 1 pixel wide left and top margins on the cover page for some PDF conversions due to incorrect rounding."
+ tickets: [1162054]
+
+ - title: "ToC Editor: Fix drag and drop of multiple items resulting in the dropped items being in random order sometimes."
+ tickets: [1161999]
+
+ improved recipes:
+ - Financial Times UK
+ - Sing Tao Daily
+ - Apple Daily
+ - A List Apart
+ - Business Week
+ - Harpers printed edition
+ - Harvard Business Review
+
+ new recipes:
+ - title: AM730
+ author: Eddie Lau
+
+ - title: Arret sur images
+ author: Francois D
+
+ - title: Diario de Noticias
+ author: Jose Pinto
+
+- version: 0.9.25
+ date: 2013-03-29
+
+ new features:
+ - title: "Automatic adding: When checking for duplicates is enabled, use the same duplicates found dialog as is used during manual adding."
+ tickets: [1160914]
+
+ - title: "ToC Editor: Allow searching to find a location quickly when browsing through the book to select a location for a ToC item"
+
+ - title: "ToC Editor: Add a button to quickly flatten the entire table of contents"
+
+ - title: "Conversion: When converting a single book to EPUB or AZW3, add an option to automatically launch the Table of Contents editor after the conversion completes. Found under the Table of Contents section of the conversion dialog."
+
+ bug fixes:
+ - title: "calibredb: Nicer error messages when user provides invalid input"
+ tickets: [1160452,1160631]
+
+ - title: "News download: Always use the .jpg extension for jpeg images as apparently Moon+ Reader cannot handle .jpeg"
+
+ - title: "Fix Book Details popup keyboard navigation doesn't work on a Mac"
+ tickets: [1159610]
+
+ - title: "Fix a regression that caused the case of the book files to not be changed when changing the case of the title/author on case insensitive filesystems"
+
+ improved recipes:
+ - RTE news
+ - Various Polish news sources
+ - Psychology Today
+ - Foreign Affairs
+ - History Today
+ - Harpers Magazine (printed edition)
+ - Business Week Magazine
+ - The Hindu
+ - Irish Times
+ - Le Devoir
+
+ new recipes:
+ - title: Fortune Magazine
+ author: Rick Shang
+
+ - title: Eclipse Online
+ author: Jim DeVona
+
- version: 0.9.24
date: 2013-03-22
diff --git a/manual/conversion.rst b/manual/conversion.rst
index bf451d0980..14710f3f6a 100644
--- a/manual/conversion.rst
+++ b/manual/conversion.rst
@@ -750,8 +750,61 @@ If this property is detected by |app|, the following custom properties are recog
opf.series
opf.seriesindex
-In addition to this, you can specify the picture to use as the cover by naming it ``opf.cover`` (right click, Picture->Options->Name) in the ODT. If no picture with this name is found, the 'smart' method is used.
-As the cover detection might result in double covers in certain output formats, the process will remove the paragraph (only if the only content is the cover!) from the document. But this works only with the named picture!
+In addition to this, you can specify the picture to use as the cover by naming
+it ``opf.cover`` (right click, Picture->Options->Name) in the ODT. If no
+picture with this name is found, the 'smart' method is used. As the cover
+detection might result in double covers in certain output formats, the process
+will remove the paragraph (only if the only content is the cover!) from the
+document. But this works only with the named picture!
To disable cover detection you can set the custom property ``opf.nocover`` ('Yes or No' type) to Yes in advanced mode.
+Converting to PDF
+~~~~~~~~~~~~~~~~~~~
+
+The first, most important, setting to decide on when converting to PDF is the page
+size. By default, |app| uses a page size defined by the current
+:guilabel:`Output profile`. So if your output profile is set to Kindle, |app|
+will create a PDF with page size suitable for viewing on the small kindle
+screen. However, if you view this PDF file on a computer screen, then it will
+appear to have too large fonts. To create "normal" sized PDFs, use the override
+page size option under :guilabel:`PDF Output` in the conversion dialog.
+
+You can insert arbitrary headers and footers on each page of the PDF by
+specifying header and footer templates. Templates are just snippets of HTML
+code that get rendered in the header and footer locations. For example, to
+display page numbers centered at the bottom of every page, in green, use the following
+footer template::
+
+
Page _PAGENUM_
+
+|app| will automatically replace _PAGENUM_ with the current page number. You
+can even put different content on even and odd pages, for example the following
+header template will show the title on odd pages and the author on even pages::
+
+ _AUTHOR__TITLE_
+
+|app| will automatically replace _TITLE_ and _AUTHOR_ with the title and author
+of the document being converted. You can also display text at the left and
+right edges and change the font size, as demonstrated with this header
+template::
+
+
+
+This will display the title at the left and the author at the right, in a font
+size smaller than the main text.
+
+Finally, you can also use the current section in templates, as shown below::
+
+ _SECTION_
+
+_SECTION_ is replaced by whatever the name of the current section is. These
+names are taken from the metadata Table of Contents in the document (the PDF
+Outline). If the document has no table of contents then it will be replaced by
+empty text. If a single PDF page has multiple sections, the first section on
+the page will be used.
+
+.. note:: When adding headers and footers make sure you set the page top and
+ bottom margins to large enough values, under the Page Setup section of the
+ conversion dialog.
+
diff --git a/manual/faq.rst b/manual/faq.rst
index 7ef0f20a95..a73a380029 100644
--- a/manual/faq.rst
+++ b/manual/faq.rst
@@ -129,11 +129,11 @@ tool that always produces valid EPUBs, |app| is not for you.
How do I use some of the advanced features of the conversion tools?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- You can get help on any individual feature of the converters by mousing over
- it in the GUI or running ``ebook-convert dummy.html .epub -h`` at a terminal.
- A good place to start is to look at the following demo files that demonstrate
- some of the advanced features:
- * `html-demo.zip `_
+You can get help on any individual feature of the converters by mousing over
+it in the GUI or running ``ebook-convert dummy.html .epub -h`` at a terminal.
+A good place to start is to look at the following demo file that demonstrates
+some of the advanced features
+`html-demo.zip `_
Device Integration
@@ -647,12 +647,17 @@ computers. Run |app| on a single computer and access it via the Content Server
or a Remote Desktop solution.
If you must share the actual library, use a file syncing tool like
-DropBox or rsync or Microsoft SkyDrive instead of a networked drive. Even with
-these tools there is danger of data corruption/loss, so only do this if you are
-willing to live with that risk. In particular, be aware that **Google Drive**
-is incompatible with |app|, if you put your |app| library in Google Drive, you
-*will* suffer data loss. See
-`this thread `_ for details.
+DropBox or rsync or Microsoft SkyDrive instead of a networked drive. If you are
+using a file-syncing tool it is **essential** that you make sure that both
+|app| and the file syncing tool do not try to access the |app| library at the
+same time. In other words, **do not** run the file syncing tool and |app| at
+the same time.
+
+Even with these tools there is danger of data corruption/loss, so only do this
+if you are willing to live with that risk. In particular, be aware that
+**Google Drive** is incompatible with |app|, if you put your |app| library in
+Google Drive, **you will suffer data loss**. See `this thread
+`_ for details.
Content From The Web
---------------------
diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe
index 00b4a8753e..50a980dc92 100644
--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@@ -66,4 +66,3 @@ class Adventure_zone(BasicNewsRecipe):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup
-
diff --git a/recipes/am730.recipe b/recipes/am730.recipe
new file mode 100644
index 0000000000..0fac4bea51
--- /dev/null
+++ b/recipes/am730.recipe
@@ -0,0 +1,290 @@
+# vim:fileencoding=UTF-8
+from __future__ import unicode_literals
+__license__ = 'GPL v3'
+__copyright__ = '2013, Eddie Lau'
+__Date__ = ''
+__HiResImg__ = True
+
+'''
+Change Log:
+2013/03/30 -- first version
+'''
+
+from calibre import (__appname__, force_unicode, strftime)
+from calibre.utils.date import now as nowf
+import os, datetime, re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from contextlib import nested
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.localization import canonicalize_lang
+
+class AppleDaily(BasicNewsRecipe):
+ title = u'AM730'
+ __author__ = 'Eddie Lau'
+ publisher = 'AM730'
+ oldest_article = 1
+ max_articles_per_feed = 100
+ auto_cleanup = False
+ language = 'zh'
+ encoding = 'utf-8'
+ auto_cleanup = False
+ remove_javascript = True
+ use_embedded_content = False
+ no_stylesheets = True
+ description = 'http://www.am730.com.hk'
+ category = 'Chinese, News, Hong Kong'
+ masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
+
+ extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}'
+ keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}),
+ dict(name='div', attrs={'class':'thecontent wordsnap'}),
+ dict(name='a', attrs={'class':'lightboximg'})]
+ remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}),
+ dict(name='img', attrs={'src':'/images/am_endmark.gif'})]
+
+ def get_dtlocal(self):
+ dt_utc = datetime.datetime.utcnow()
+ # convert UTC to local hk time - at HKT 6am, all news are available
+ return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
+
+ def get_fetchdate(self):
+ if __Date__ <> '':
+ return __Date__
+ else:
+ return self.get_dtlocal().strftime("%Y%m%d")
+
+ def get_fetchformatteddate(self):
+ if __Date__ <> '':
+ return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
+ else:
+ return self.get_dtlocal().strftime("%Y-%m-%d")
+
+ def get_fetchyear(self):
+ if __Date__ <> '':
+ return __Date__[0:4]
+ else:
+ return self.get_dtlocal().strftime("%Y")
+
+ def get_fetchmonth(self):
+ if __Date__ <> '':
+ return __Date__[4:6]
+ else:
+ return self.get_dtlocal().strftime("%m")
+
+ def get_fetchday(self):
+ if __Date__ <> '':
+ return __Date__[6:8]
+ else:
+ return self.get_dtlocal().strftime("%d")
+
+ # Note: does not work with custom date given by __Date__
+ def get_weekday(self):
+ return self.get_dtlocal().weekday()
+
+ def populate_article_metadata(self, article, soup, first):
+ if first and hasattr(self, 'add_toc_thumbnail'):
+ picdiv = soup.find('img')
+ if picdiv is not None:
+ self.add_toc_thumbnail(article,picdiv['src'])
+
+ def parse_index(self):
+ feeds = []
+ soup = self.index_to_soup('http://www.am730.com.hk/')
+ ul = soup.find(attrs={'class':'nav-section'})
+ sectionList = []
+ for li in ul.findAll('li'):
+ a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False)
+ title = li.find('a').get('title', False).strip()
+ sectionList.append((title, a))
+ for title, url in sectionList:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
+ return feeds
+
+ def parse_section(self, url):
+ soup = self.index_to_soup(url)
+ items = soup.findAll(attrs={'style':'padding-bottom: 15px;'})
+ current_articles = []
+ for item in items:
+ a = item.find(attrs={'class':'t6 f14'}).find('a', href=True)
+ articlelink = 'http://www.am730.com.hk/' + a.get('href', True)
+ title = self.tag_to_string(a)
+ description = self.tag_to_string(item.find(attrs={'class':'t3 f14'}))
+ current_articles.append({'title': title, 'url': articlelink, 'description': description})
+ return current_articles
+
+ def preprocess_html(self, soup):
+ multia = soup.findAll('a')
+ for a in multia:
+ if not (a == None):
+ image = a.find('img')
+ if not (image == None):
+ if __HiResImg__:
+ image['src'] = image.get('src').replace('/thumbs/', '/')
+ caption = image.get('alt')
+ tag = Tag(soup, "photo", [])
+ tag2 = Tag(soup, "photocaption", [])
+ tag.insert(0, image)
+ if not caption == None:
+ tag2.insert(0, caption)
+ tag.insert(1, tag2)
+ a.replaceWith(tag)
+ return soup
+
+ def create_opf(self, feeds, dir=None):
+ if dir is None:
+ dir = self.output_dir
+ title = self.short_title()
+ if self.output_profile.periodical_date_in_title:
+ title += strftime(self.timefmt)
+ mi = MetaInformation(title, [__appname__])
+ mi.publisher = __appname__
+ mi.author_sort = __appname__
+ if self.publication_type:
+ mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+ mi.timestamp = nowf()
+ article_titles, aseen = [], set()
+ for f in feeds:
+ for a in f:
+ if a.title and a.title not in aseen:
+ aseen.add(a.title)
+ article_titles.append(force_unicode(a.title, 'utf-8'))
+
+ mi.comments = self.description
+ if not isinstance(mi.comments, unicode):
+ mi.comments = mi.comments.decode('utf-8', 'replace')
+ mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
+ '\n\n'.join(article_titles))
+
+ language = canonicalize_lang(self.language)
+ if language is not None:
+ mi.language = language
+ # This one affects the pub date shown in kindle title
+ #mi.pubdate = nowf()
+ # now appears to need the time field to be > 12.00noon as well
+ mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
+ opf_path = os.path.join(dir, 'index.opf')
+ ncx_path = os.path.join(dir, 'index.ncx')
+
+ opf = OPFCreator(dir, mi)
+ # Add mastheadImage entry to section
+ mp = getattr(self, 'masthead_path', None)
+ if mp is not None and os.access(mp, os.R_OK):
+ from calibre.ebooks.metadata.opf2 import Guide
+ ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+ ref.type = 'masthead'
+ ref.title = 'Masthead Image'
+ opf.guide.append(ref)
+
+ manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+ manifest.append(os.path.join(dir, 'index.html'))
+ manifest.append(os.path.join(dir, 'index.ncx'))
+
+ # Get cover
+ cpath = getattr(self, 'cover_path', None)
+ if cpath is None:
+ pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+ if self.default_cover(pf):
+ cpath = pf.name
+ if cpath is not None and os.access(cpath, os.R_OK):
+ opf.cover = cpath
+ manifest.append(cpath)
+
+ # Get masthead
+ mpath = getattr(self, 'masthead_path', None)
+ if mpath is not None and os.access(mpath, os.R_OK):
+ manifest.append(mpath)
+
+ opf.create_manifest_from_files_in(manifest)
+ for mani in opf.manifest:
+ if mani.path.endswith('.ncx'):
+ mani.id = 'ncx'
+ if mani.path.endswith('mastheadImage.jpg'):
+ mani.id = 'masthead-image'
+
+ entries = ['index.html']
+ toc = TOC(base_path=dir)
+ self.play_order_counter = 0
+ self.play_order_map = {}
+
+
+ def feed_index(num, parent):
+ f = feeds[num]
+ for j, a in enumerate(f):
+ if getattr(a, 'downloaded', False):
+ adir = 'feed_%d/article_%d/'%(num, j)
+ auth = a.author
+ if not auth:
+ auth = None
+ desc = a.text_summary
+ if not desc:
+ desc = None
+ else:
+ desc = self.description_limiter(desc)
+ tt = a.toc_thumbnail if a.toc_thumbnail else None
+ entries.append('%sindex.html'%adir)
+ po = self.play_order_map.get(entries[-1], None)
+ if po is None:
+ self.play_order_counter += 1
+ po = self.play_order_counter
+ parent.add_item('%sindex.html'%adir, None,
+ a.title if a.title else _('Untitled Article'),
+ play_order=po, author=auth,
+ description=desc, toc_thumbnail=tt)
+ last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+ for sp in a.sub_pages:
+ prefix = os.path.commonprefix([opf_path, sp])
+ relp = sp[len(prefix):]
+ entries.append(relp.replace(os.sep, '/'))
+ last = sp
+
+ if os.path.exists(last):
+ with open(last, 'rb') as fi:
+ src = fi.read().decode('utf-8')
+ soup = BeautifulSoup(src)
+ body = soup.find('body')
+ if body is not None:
+ prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+ templ = self.navbar.generate(True, num, j, len(f),
+ not self.has_single_feed,
+ a.orig_url, __appname__, prefix=prefix,
+ center=self.center_navbar)
+ elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+ body.insert(len(body.contents), elem)
+ with open(last, 'wb') as fi:
+ fi.write(unicode(soup).encode('utf-8'))
+ if len(feeds) == 0:
+ raise Exception('All feeds are empty, aborting.')
+
+ if len(feeds) > 1:
+ for i, f in enumerate(feeds):
+ entries.append('feed_%d/index.html'%i)
+ po = self.play_order_map.get(entries[-1], None)
+ if po is None:
+ self.play_order_counter += 1
+ po = self.play_order_counter
+ auth = getattr(f, 'author', None)
+ if not auth:
+ auth = None
+ desc = getattr(f, 'description', None)
+ if not desc:
+ desc = None
+ feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+ f.title, play_order=po, description=desc, author=auth))
+
+ else:
+ entries.append('feed_%d/index.html'%0)
+ feed_index(0, toc)
+
+ for i, p in enumerate(entries):
+ entries[i] = os.path.join(dir, p.replace('/', os.sep))
+ opf.create_spine(entries)
+ opf.set_toc(toc)
+
+ with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+ opf.render(opf_file, ncx_file)
+
diff --git a/recipes/apple_daily.recipe b/recipes/apple_daily.recipe
index 763136c9b0..522427ed6a 100644
--- a/recipes/apple_daily.recipe
+++ b/recipes/apple_daily.recipe
@@ -1,161 +1,275 @@
-# -*- coding: utf-8 -*-
-import re
+# vim:fileencoding=UTF-8
+from __future__ import unicode_literals
+__license__ = 'GPL v3'
+__copyright__ = '2013, Eddie Lau'
+__Date__ = ''
+
+from calibre import (__appname__, force_unicode, strftime)
+from calibre.utils.date import now as nowf
+import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
+from contextlib import nested
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.localization import canonicalize_lang
class AppleDaily(BasicNewsRecipe):
-
- title = u'蘋果日報'
- __author__ = u'蘋果日報'
- __publisher__ = u'蘋果日報'
- description = u'蘋果日報'
- masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
- language = 'zh_TW'
- encoding = 'UTF-8'
- timefmt = ' [%a, %d %b, %Y]'
- needs_subscription = False
+ title = u'蘋果日報 (香港)'
+ __author__ = 'Eddie Lau'
+ publisher = '蘋果日報'
+ oldest_article = 1
+ max_articles_per_feed = 100
+ auto_cleanup = False
+ language = 'zh'
+ encoding = 'utf-8'
+ auto_cleanup = False
remove_javascript = True
- remove_tags_before = dict(name=['ul', 'h1'])
- remove_tags_after = dict(name='form')
- remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
- dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
- dict(name=['script', 'noscript', 'style', 'form'])]
+ use_embedded_content = False
no_stylesheets = True
- extra_css = '''
- @font-face {font-family: "uming", serif, sans-serif; src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n
- body {margin-right: 8pt; font-family: 'uming', serif;}
- h1 {font-family: 'uming', serif, sans-serif}
- '''
- #extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
+ description = 'http://hkm.appledaily.com/'
+ category = 'Chinese, News, Hong Kong'
+ masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'
- preprocess_regexps = [
- (re.compile(r'img.php?server=(?P[^&]+)&path=(?P[^&]+).*', re.DOTALL|re.IGNORECASE),
- lambda match: 'http://' + match.group('server') + '/' + match.group('path')),
- ]
+ extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}'
+ keep_only_tags = [dict(name='div', attrs={'id':'content-article'})]
+ remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}),
+ dict(name='p', attrs={'class':'next'})]
+
+ def get_dtlocal(self):
+ dt_utc = datetime.datetime.utcnow()
+ # convert UTC to local hk time - at HKT 6am, all news are available
+ return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
+
+ def get_fetchdate(self):
+ if __Date__ <> '':
+ return __Date__
+ else:
+ return self.get_dtlocal().strftime("%Y%m%d")
+
+ def get_fetchformatteddate(self):
+ if __Date__ <> '':
+ return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
+ else:
+ return self.get_dtlocal().strftime("%Y-%m-%d")
+
+ def get_fetchyear(self):
+ if __Date__ <> '':
+ return __Date__[0:4]
+ else:
+ return self.get_dtlocal().strftime("%Y")
+
+ def get_fetchmonth(self):
+ if __Date__ <> '':
+ return __Date__[4:6]
+ else:
+ return self.get_dtlocal().strftime("%m")
+
+ def get_fetchday(self):
+ if __Date__ <> '':
+ return __Date__[6:8]
+ else:
+ return self.get_dtlocal().strftime("%d")
+
+ # Note: does not work with custom date given by __Date__
+ def get_weekday(self):
+ return self.get_dtlocal().weekday()
def get_cover_url(self):
- return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
-
-
- #def get_browser(self):
- #br = BasicNewsRecipe.get_browser(self)
- #if self.username is not None and self.password is not None:
- # br.open('http://www.nytimes.com/auth/login')
- # br.select_form(name='login')
- # br['USERID'] = self.username
- # br['PASSWORD'] = self.password
- # br.submit()
- #return br
-
- def preprocess_html(self, soup):
- #process all the images
- for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
- iurl = tag['src']
- #print 'checking image: ' + iurl
-
- #img\.php?server\=(?P[^&]+)&path=(?P[^&]+)
- p = re.compile(r'img\.php\?server=(?P[^&]+)&path=(?P[^&]+)', re.DOTALL|re.IGNORECASE)
-
- m = p.search(iurl)
-
- if m is not None:
- iurl = 'http://' + m.group('server') + '/' + m.group('path')
- #print 'working! new url: ' + iurl
- tag['src'] = iurl
- #else:
- #print 'not good'
-
- for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
- iurl = tag['href']
- #print 'checking image: ' + iurl
-
- #img\.php?server\=(?P[^&]+)&path=(?P[^&]+)
- p = re.compile(r'img\.php\?server=(?P[^&]+)&path=(?P[^&]+)', re.DOTALL|re.IGNORECASE)
-
- m = p.search(iurl)
-
- if m is not None:
- iurl = 'http://' + m.group('server') + '/' + m.group('path')
- #print 'working! new url: ' + iurl
- tag['href'] = iurl
- #else:
- #print 'not good'
-
- return soup
+ soup = self.index_to_soup('http://hkm.appledaily.com/')
+ cover = soup.find(attrs={'class':'top-news'}).get('src', False)
+ br = BasicNewsRecipe.get_browser(self)
+ try:
+ br.open(cover)
+ except:
+ cover = None
+ return cover
+ def populate_article_metadata(self, article, soup, first):
+ if first and hasattr(self, 'add_toc_thumbnail'):
+ picdiv = soup.find('img')
+ if picdiv is not None:
+ self.add_toc_thumbnail(article,picdiv['src'])
def parse_index(self):
- base = 'http://news.hotpot.hk/fruit'
- soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php')
+ feeds = []
+ soup = self.index_to_soup('http://hkm.appledaily.com/')
+ ul = soup.find(attrs={'class':'menu'})
+ sectionList = []
+ for li in ul.findAll('li'):
+ a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False)
+ title = li.find('a', text=True).strip()
+ if not title == u'動新聞':
+ sectionList.append((title, a))
+ for title, url in sectionList:
+ articles = self.parse_section(url)
+ if articles:
+ feeds.append((title, articles))
+ return feeds
- #def feed_title(div):
- # return ''.join(div.findAll(text=True, recursive=False)).strip()
+ def parse_section(self, url):
+ soup = self.index_to_soup(url)
+ ul = soup.find(attrs={'class':'list'})
+ current_articles = []
+ for li in ul.findAll('li'):
+ a = li.find('a', href=True)
+ title = li.find('p', text=True).strip()
+ if a is not None:
+ current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)})
+ pass
+ return current_articles
- articles = {}
- key = None
- ans = []
- for div in soup.findAll('li'):
- key = div.find(text=True, recursive=True);
- #if key == u'豪情':
- # continue;
+ def create_opf(self, feeds, dir=None):
+ if dir is None:
+ dir = self.output_dir
+ title = self.short_title()
+ if self.output_profile.periodical_date_in_title:
+ title += strftime(self.timefmt)
+ mi = MetaInformation(title, [__appname__])
+ mi.publisher = __appname__
+ mi.author_sort = __appname__
+ if self.publication_type:
+ mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+ mi.timestamp = nowf()
+ article_titles, aseen = [], set()
+ for f in feeds:
+ for a in f:
+ if a.title and a.title not in aseen:
+ aseen.add(a.title)
+ article_titles.append(force_unicode(a.title, 'utf-8'))
- print 'section=' + key
+ mi.comments = self.description
+ if not isinstance(mi.comments, unicode):
+ mi.comments = mi.comments.decode('utf-8', 'replace')
+ mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
+ '\n\n'.join(article_titles))
- articles[key] = []
+ language = canonicalize_lang(self.language)
+ if language is not None:
+ mi.language = language
+ # This one affects the pub date shown in kindle title
+ #mi.pubdate = nowf()
+ # now appears to need the time field to be > 12.00noon as well
+ mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
+ opf_path = os.path.join(dir, 'index.opf')
+ ncx_path = os.path.join(dir, 'index.ncx')
- ans.append(key)
+ opf = OPFCreator(dir, mi)
+ # Add mastheadImage entry to section
+ mp = getattr(self, 'masthead_path', None)
+ if mp is not None and os.access(mp, os.R_OK):
+ from calibre.ebooks.metadata.opf2 import Guide
+ ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+ ref.type = 'masthead'
+ ref.title = 'Masthead Image'
+ opf.guide.append(ref)
- a = div.find('a', href=True)
+ manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+ manifest.append(os.path.join(dir, 'index.html'))
+ manifest.append(os.path.join(dir, 'index.ncx'))
- if not a:
- continue
+ # Get cover
+ cpath = getattr(self, 'cover_path', None)
+ if cpath is None:
+ pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+ if self.default_cover(pf):
+ cpath = pf.name
+ if cpath is not None and os.access(cpath, os.R_OK):
+ opf.cover = cpath
+ manifest.append(cpath)
- url = base + '/' + a['href']
- print 'url=' + url
+ # Get masthead
+ mpath = getattr(self, 'masthead_path', None)
+ if mpath is not None and os.access(mpath, os.R_OK):
+ manifest.append(mpath)
- if not articles.has_key(key):
- articles[key] = []
- else:
- # sub page
- subSoup = self.index_to_soup(url)
+ opf.create_manifest_from_files_in(manifest)
+ for mani in opf.manifest:
+ if mani.path.endswith('.ncx'):
+ mani.id = 'ncx'
+ if mani.path.endswith('mastheadImage.jpg'):
+ mani.id = 'masthead-image'
- for subDiv in subSoup.findAll('li'):
- subA = subDiv.find('a', href=True)
- subTitle = subDiv.find(text=True, recursive=True)
- subUrl = base + '/' + subA['href']
-
- print 'subUrl' + subUrl
-
- articles[key].append(
- dict(title=subTitle,
- url=subUrl,
- date='',
- description='',
- content=''))
+ entries = ['index.html']
+ toc = TOC(base_path=dir)
+ self.play_order_counter = 0
+ self.play_order_map = {}
-# elif div['class'] in ['story', 'story headline']:
-# a = div.find('a', href=True)
-# if not a:
-# continue
-# url = re.sub(r'\?.*', '', a['href'])
-# url += '?pagewanted=all'
-# title = self.tag_to_string(a, use_alt=True).strip()
-# description = ''
-# pubdate = strftime('%a, %d %b')
-# summary = div.find(True, attrs={'class':'summary'})
-# if summary:
-# description = self.tag_to_string(summary, use_alt=False)
-#
-# feed = key if key is not None else 'Uncategorized'
-# if not articles.has_key(feed):
-# articles[feed] = []
-# if not 'podcasts' in url:
-# articles[feed].append(
-# dict(title=title, url=url, date=pubdate,
-# description=description,
-# content=''))
-# ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
- ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)]
- return ans
+ def feed_index(num, parent):
+ f = feeds[num]
+ for j, a in enumerate(f):
+ if getattr(a, 'downloaded', False):
+ adir = 'feed_%d/article_%d/'%(num, j)
+ auth = a.author
+ if not auth:
+ auth = None
+ desc = a.text_summary
+ if not desc:
+ desc = None
+ else:
+ desc = self.description_limiter(desc)
+ tt = a.toc_thumbnail if a.toc_thumbnail else None
+ entries.append('%sindex.html'%adir)
+ po = self.play_order_map.get(entries[-1], None)
+ if po is None:
+ self.play_order_counter += 1
+ po = self.play_order_counter
+ parent.add_item('%sindex.html'%adir, None,
+ a.title if a.title else _('Untitled Article'),
+ play_order=po, author=auth,
+ description=desc, toc_thumbnail=tt)
+ last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+ for sp in a.sub_pages:
+ prefix = os.path.commonprefix([opf_path, sp])
+ relp = sp[len(prefix):]
+ entries.append(relp.replace(os.sep, '/'))
+ last = sp
+ if os.path.exists(last):
+ with open(last, 'rb') as fi:
+ src = fi.read().decode('utf-8')
+ soup = BeautifulSoup(src)
+ body = soup.find('body')
+ if body is not None:
+ prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+ templ = self.navbar.generate(True, num, j, len(f),
+ not self.has_single_feed,
+ a.orig_url, __appname__, prefix=prefix,
+ center=self.center_navbar)
+ elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+ body.insert(len(body.contents), elem)
+ with open(last, 'wb') as fi:
+ fi.write(unicode(soup).encode('utf-8'))
+ if len(feeds) == 0:
+ raise Exception('All feeds are empty, aborting.')
+
+ if len(feeds) > 1:
+ for i, f in enumerate(feeds):
+ entries.append('feed_%d/index.html'%i)
+ po = self.play_order_map.get(entries[-1], None)
+ if po is None:
+ self.play_order_counter += 1
+ po = self.play_order_counter
+ auth = getattr(f, 'author', None)
+ if not auth:
+ auth = None
+ desc = getattr(f, 'description', None)
+ if not desc:
+ desc = None
+ feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+ f.title, play_order=po, description=desc, author=auth))
+
+ else:
+ entries.append('feed_%d/index.html'%0)
+ feed_index(0, toc)
+
+ for i, p in enumerate(entries):
+ entries[i] = os.path.join(dir, p.replace('/', os.sep))
+ opf.create_spine(entries)
+ opf.set_toc(toc)
+
+ with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+ opf.render(opf_file, ncx_file)
diff --git a/recipes/arret_sur_images.recipe b/recipes/arret_sur_images.recipe
new file mode 100644
index 0000000000..fac2983231
--- /dev/null
+++ b/recipes/arret_sur_images.recipe
@@ -0,0 +1,54 @@
+from __future__ import unicode_literals
+
+__license__ = 'WTFPL'
+__author__ = '2013, François D. '
+__description__ = 'Get some fresh news from Arrêt sur images'
+
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class Asi(BasicNewsRecipe):
+
+ title = 'Arrêt sur images'
+ __author__ = 'François D. (aka franek)'
+ description = 'Global news in french from news site "Arrêt sur images"'
+
+ oldest_article = 7.0
+ language = 'fr'
+ needs_subscription = True
+ max_articles_per_feed = 100
+
+ simultaneous_downloads = 1
+ timefmt = '[%a, %d %b %Y %I:%M +0200]'
+ cover_url = 'http://www.arretsurimages.net/images/header/menu/menu_1.png'
+
+ use_embedded_content = False
+ no_stylesheets = True
+ remove_javascript = True
+
+ feeds = [
+ ('vite dit et gratuit', 'http://www.arretsurimages.net/vite-dit.rss'),
+ ('Toutes les chroniques', 'http://www.arretsurimages.net/chroniques.rss'),
+ ('Contenus et dossiers', 'http://www.arretsurimages.net/dossiers.rss'),
+ ]
+
+ conversion_options = { 'smarten_punctuation' : True }
+
+ remove_tags = [dict(id='vite-titre'), dict(id='header'), dict(id='wrap-connexion'), dict(id='col_right'), dict(name='div', attrs={'class':'bloc-chroniqueur-2'}), dict(id='footercontainer')]
+
+ def print_version(self, url):
+ return url.replace('contenu.php', 'contenu-imprimable.php')
+
+ def get_browser(self):
+ # Need to use robust HTML parser
+ br = BasicNewsRecipe.get_browser(self, use_robust_parser=True)
+ if self.username is not None and self.password is not None:
+ br.open('http://www.arretsurimages.net/index.php')
+ br.select_form(nr=0)
+ br.form.set_all_readonly(False)
+ br['redir'] = 'forum/login.php'
+ br['username'] = self.username
+ br['password'] = self.password
+ br.submit()
+ return br
+
diff --git a/recipes/astroflesz.recipe b/recipes/astroflesz.recipe
index 11a56ec6b5..676aedfd3a 100644
--- a/recipes/astroflesz.recipe
+++ b/recipes/astroflesz.recipe
@@ -2,12 +2,12 @@
from calibre.web.feeds.news import BasicNewsRecipe
class Astroflesz(BasicNewsRecipe):
- title = u'Astroflesz'
+ title = u'Astroflesz'
oldest_article = 7
- __author__ = 'fenuks'
- description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne'
- category = 'astronomy'
- language = 'pl'
+ __author__ = 'fenuks'
+ description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne'
+ category = 'astronomy'
+ language = 'pl'
cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png'
ignore_duplicate_articles = {'title', 'url'}
max_articles_per_feed = 100
@@ -17,7 +17,7 @@ class Astroflesz(BasicNewsRecipe):
keep_only_tags = [dict(id="k2Container")]
remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
- feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
+ feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
def postprocess_html(self, soup, first_fetch):
t = soup.find(attrs={'class':'itemIntroText'})
diff --git a/recipes/badania_net.recipe b/recipes/badania_net.recipe
index 01499f6369..c47e9b6f54 100644
--- a/recipes/badania_net.recipe
+++ b/recipes/badania_net.recipe
@@ -1,17 +1,20 @@
from calibre.web.feeds.news import BasicNewsRecipe
+import re
class BadaniaNet(BasicNewsRecipe):
- title = u'badania.net'
+ title = u'badania.net'
__author__ = 'fenuks'
- description = u'chcesz wiedzieć więcej?'
- category = 'science'
- language = 'pl'
+ description = u'chcesz wiedzieć więcej?'
+ category = 'science'
+ language = 'pl'
cover_url = 'http://badania.net/wp-content/badanianet_green_transparent.png'
+ extra_css = '.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
+ preprocess_regexps = [(re.compile(r"Tekst sponsoruje
", re.IGNORECASE), lambda m: ''),]
remove_empty_feeds = True
use_embedded_content = False
remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})]
remove_tags_after = dict(attrs={'class':'omc-single-tags'})
keep_only_tags = [dict(id='omc-full-article')]
- feeds = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
+ feeds = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
\ No newline at end of file
diff --git a/recipes/birmingham_post.recipe b/recipes/birmingham_post.recipe
index db2e29c821..86db7504fe 100644
--- a/recipes/birmingham_post.recipe
+++ b/recipes/birmingham_post.recipe
@@ -9,14 +9,14 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
__author__ = 'Dave Asbury'
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
oldest_article = 2
- max_articles_per_feed = 12
+ max_articles_per_feed = 20
linearize_tables = True
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
auto_cleanup = True
language = 'en_GB'
-
+ compress_news_images = True
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
diff --git a/recipes/bwmagazine.recipe b/recipes/bwmagazine.recipe
index d11861ce08..ae3197da81 100644
--- a/recipes/bwmagazine.recipe
+++ b/recipes/bwmagazine.recipe
@@ -37,68 +37,15 @@ class BusinessWeek(BasicNewsRecipe):
, 'language' : language
}
- #remove_tags = [
- #dict(attrs={'class':'inStory'})
- #,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
- #,dict(attrs={'id':['inset','videoDisplay']})
- #]
- #keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})]
- remove_attributes = ['lang']
- match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
-
feeds = [
- (u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
- (u'Top News' , u'http://www.businessweek.com/rss/bwdaily.rss' ),
- (u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
- (u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
- (u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
- (u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
- (u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
- (u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
- (u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
- (u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
- (u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
- (u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
- (u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
- (u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
- (u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
- (u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
- (u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
- (u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
- (u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
- (u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
+ (u'Top Stories', u'http://www.businessweek.com/feeds/most-popular.rss'),
]
- def get_article_url(self, article):
- url = article.get('guid', None)
- if 'podcasts' in url:
- return None
- if 'surveys' in url:
- return None
- if 'images' in url:
- return None
- if 'feedroom' in url:
- return None
- if '/magazine/toc/' in url:
- return None
- rurl, sep, rest = url.rpartition('?')
- if rurl:
- return rurl
- return rest
-
def print_version(self, url):
- if '/news/' in url or '/blog/ in url':
- return url
- rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/')
- return rurl.replace('/investing/','/investor/')
+ soup = self.index_to_soup(url)
+ prntver = soup.find('li', attrs={'class':'print tracked'})
+ rurl = prntver.find('a', href=True)['href']
+ return rurl
+
- def preprocess_html(self, soup):
- for item in soup.findAll(style=True):
- del item['style']
- for alink in soup.findAll('a'):
- if alink.string is not None:
- tstr = alink.string
- alink.replaceWith(tstr)
- return soup
-
diff --git a/recipes/bwmagazine2.recipe b/recipes/bwmagazine2.recipe
index 608c046d07..d02efc2861 100644
--- a/recipes/bwmagazine2.recipe
+++ b/recipes/bwmagazine2.recipe
@@ -1,3 +1,4 @@
+import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from collections import OrderedDict
@@ -39,7 +40,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
title=self.tag_to_string(div.a).strip()
url=div.a['href']
soup0 = self.index_to_soup(url)
- urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
+ urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''})
@@ -56,7 +57,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
title=self.tag_to_string(div.a).strip()
url=div.a['href']
soup0 = self.index_to_soup(url)
- urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
+ urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''})
if articles:
diff --git a/recipes/countryfile.recipe b/recipes/countryfile.recipe
index 86769b78cd..717f81afce 100644
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@@ -7,13 +7,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
__author__ = 'Dave Asbury'
description = 'The official website of Countryfile Magazine'
- # last updated 8/12/12
+ # last updated 19/10/12
language = 'en_GB'
oldest_article = 30
max_articles_per_feed = 25
remove_empty_feeds = True
no_stylesheets = True
auto_cleanup = True
+ compress_news_images = True
ignore_duplicate_articles = {'title', 'url'}
#articles_are_obfuscated = True
#article_already_exists = False
diff --git a/recipes/daily_mirror.recipe b/recipes/daily_mirror.recipe
index bff337bcf7..91a274ab8c 100644
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@@ -13,9 +13,9 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
-
+ compress_news_images = True
oldest_article = 1
- max_articles_per_feed = 1
+ max_articles_per_feed = 12
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
diff --git a/recipes/diario_de_noticias.recipe b/recipes/diario_de_noticias.recipe
new file mode 100644
index 0000000000..4ba7c6f7e5
--- /dev/null
+++ b/recipes/diario_de_noticias.recipe
@@ -0,0 +1,23 @@
+# vim:fileencoding=UTF-8
+
+from __future__ import unicode_literals
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1365070687(BasicNewsRecipe):
+ title ='Diário de Notícias'
+ oldest_article = 7
+ language = 'pt'
+ __author__ = 'Jose Pinto'
+ max_articles_per_feed = 100
+ keep_only_tags = [dict(name='div', attrs={'id':'cln-esqmid'}) ]
+ remove_tags = [ dict(name='table', attrs={'class':'TabFerramentasInf'}) ]
+
+ feeds = [(u'Portugal', u'http://feeds.dn.pt/DN-Portugal'),
+ (u'Globo', u'http://feeds.dn.pt/DN-Globo'),
+ (u'Economia', u'http://feeds.dn.pt/DN-Economia'),
+ (u'Ci\xeancia', u'http://feeds.dn.pt/DN-Ciencia'),
+ (u'Artes', u'http://feeds.dn.pt/DN-Artes'),
+ (u'TV & Media', u'http://feeds.dn.pt/DN-Media'),
+ (u'Opini\xe3o', u'http://feeds.dn.pt/DN-Opiniao'),
+ (u'Pessoas', u'http://feeds.dn.pt/DN-Pessoas')
+ ]
diff --git a/recipes/dzial_zagraniczny.recipe b/recipes/dzial_zagraniczny.recipe
new file mode 100644
index 0000000000..1b8453dd40
--- /dev/null
+++ b/recipes/dzial_zagraniczny.recipe
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel '
+
+'''
+dzialzagraniczny.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class dzial_zagraniczny(BasicNewsRecipe):
+ title = u'Dział Zagraniczny'
+ __author__ = 'teepel '
+ language = 'pl'
+ description = u'Polskiego czytelnika to nie interesuje'
+ INDEX = 'http://dzialzagraniczny.pl'
+ extra_css = 'img {display: block;}'
+ oldest_article = 7
+ cover_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-prn1/c145.5.160.160/559442_415653975115959_2126205128_n.jpg'
+ max_articles_per_feed = 100
+ remove_empty_feeds = True
+ remove_javascript = True
+ no_stylesheets = True
+ use_embedded_content = True
+
+ feeds = [(u'Dział zagraniczny', u'http://feeds.feedburner.com/dyndns/UOfz')]
diff --git a/recipes/economia.recipe b/recipes/economia.recipe
new file mode 100644
index 0000000000..249125b76f
--- /dev/null
+++ b/recipes/economia.recipe
@@ -0,0 +1,17 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1314326622(BasicNewsRecipe):
+ title = u'Economia'
+ __author__ = 'Manish Bhattarai'
+ description = 'Economia - Intelligence & Insight for ICAEW Members'
+ language = 'en_GB'
+ oldest_article = 7
+ max_articles_per_feed = 25
+ masthead_url = 'http://economia.icaew.com/~/media/Images/Design%20Images/Economia_Red_website.ashx'
+ cover_url = 'http://economia.icaew.com/~/media/Images/Design%20Images/Economia_Red_website.ashx'
+ no_stylesheets = True
+ remove_empty_feeds = True
+ remove_tags_before = dict(id='content')
+ remove_tags_after = dict(id='stars-wrapper')
+ remove_tags = [dict(attrs={'class':['floatR', 'sharethis', 'rating clearfix']})]
+ feeds = [(u'News', u'http://feedity.com/icaew-com/VlNTVFRa.rss'),(u'Business', u'http://feedity.com/icaew-com/VlNTVFtS.rss'),(u'People', u'http://feedity.com/icaew-com/VlNTVFtX.rss'),(u'Opinion', u'http://feedity.com/icaew-com/VlNTVFtW.rss'),(u'Finance', u'http://feedity.com/icaew-com/VlNTVFtV.rss')]
diff --git a/recipes/el_diplo.recipe b/recipes/el_diplo.recipe
index b9ef8268e1..7827cbbdd7 100644
--- a/recipes/el_diplo.recipe
+++ b/recipes/el_diplo.recipe
@@ -26,7 +26,7 @@ class ElDiplo_Recipe(BasicNewsRecipe):
title = u'El Diplo'
__author__ = 'Tomas Di Domenico'
description = 'Publicacion mensual de Le Monde Diplomatique, edicion Argentina'
- langauge = 'es_AR'
+ language = 'es_AR'
needs_subscription = True
auto_cleanup = True
diff --git a/recipes/equipped.recipe b/recipes/equipped.recipe
new file mode 100644
index 0000000000..af74c10523
--- /dev/null
+++ b/recipes/equipped.recipe
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel , Artur Stachecki '
+
+'''
+equipped.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class equipped(BasicNewsRecipe):
+ title = u'Equipped'
+ __author__ = 'teepel '
+ language = 'pl'
+ description = u'Wiadomości z equipped.pl'
+ INDEX = 'http://equipped.pl'
+ extra_css = '.alignleft {float:left; margin-right:5px;}'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ remove_empty_feeds = True
+ simultaneous_downloads = 5
+ remove_javascript = True
+ no_stylesheets = True
+ use_embedded_content = False
+ #keep_only_tags = [dict(name='article')]
+ #remove_tags = [dict(id='disqus_thread')]
+ #remove_tags_after = [dict(id='disqus_thread')]
+
+ feeds = [(u'Equipped', u'http://feeds.feedburner.com/Equippedpl?format=xml')]
diff --git a/recipes/esensja_(rss).recipe b/recipes/esensja_(rss).recipe
index af23ea58a9..0afa2b0d07 100644
--- a/recipes/esensja_(rss).recipe
+++ b/recipes/esensja_(rss).recipe
@@ -12,12 +12,6 @@ class EsensjaRSS(BasicNewsRecipe):
language = 'pl'
encoding = 'utf-8'
INDEX = 'http://www.esensja.pl'
- extra_css = '''.t-title {font-size: x-large; font-weight: bold; text-align: left}
- .t-author {font-size: x-small; text-align: left}
- .t-title2 {font-size: x-small; font-style: italic; text-align: left}
- .text {font-size: small; text-align: left}
- .annot-ref {font-style: italic; text-align: left}
- '''
cover_url = ''
masthead_url = 'http://esensja.pl/img/wrss.gif'
use_embedded_content = False
diff --git a/recipes/film_org_pl.recipe b/recipes/film_org_pl.recipe
index fa0a69912b..4d4ba23e88 100644
--- a/recipes/film_org_pl.recipe
+++ b/recipes/film_org_pl.recipe
@@ -1,20 +1,54 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Comment
import re
class FilmOrgPl(BasicNewsRecipe):
- title = u'Film.org.pl'
- __author__ = 'fenuks'
- description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
- category = 'film'
- language = 'pl'
- extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;}'
+ title = u'Film.org.pl'
+ __author__ = 'fenuks'
+ description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
+ category = 'film'
+ language = 'pl'
+ extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;} .recenzja-title {font-size: 150%; margin-top: 5px; margin-bottom: 5px;}'
cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png'
ignore_duplicate_articles = {'title', 'url'}
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
+ remove_javascript = True
remove_empty_feeds = True
- use_embedded_content = True
- preprocess_regexps = [(re.compile(ur'Przeczytaj także:
.*', re.IGNORECASE|re.DOTALL), lambda m: '