sync with Kovid's branch

2025-08-30 23:00:21 -04:00 · 2013-04-05 23:02:22 +02:00 · 2013-04-05 23:02:22 +02:00 · 22848698a5
commit 22848698a5
parent 9f5a125480 fa47afe5a6
285 changed files with 74220 additions and 47278 deletions
--- a/.bzrignore
+++ b/.bzrignore
@ -40,6 +40,7 @@ recipes/.gitignore
 recipes/README.md
 recipes/icon_checker.py
 recipes/readme_updater.py
+recipes/garfield.recipe
 recipes/katalog_egazeciarz.recipe
 recipes/tv_axnscifi.recipe
 recipes/tv_comedycentral.recipe
@ -63,6 +64,7 @@ recipes/tv_tvppolonia.recipe
 recipes/tv_tvpuls.recipe
 recipes/tv_viasathistory.recipe
 recipes/icons/katalog_egazeciarz.png
+recipes/icons/garfield.png
 recipes/icons/tv_axnscifi.png
 recipes/icons/tv_comedycentral.png
 recipes/icons/tv_discoveryscience.png
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -1,3 +1,4 @@
+# vim:fileencoding=UTF-8:ts=2:sw=2:sta:et:sts=2:ai
 # Each release can have new features and bug fixes. Each of which
 # must have a title and can optionally have linked tickets and a description.
 # In addition they can have a type field which defaults to minor, but should be major
@ -19,6 +20,157 @@
 #   new recipes:
 #     - title: 

+- version: 0.9.26
+  date: 2013-04-05
+
+  new features:
+    - title: "PDF Output: Allow using templates to create arbitrary headers and footers. Look under PDF Output in the conversion dialog for this feature."
+
+    - title: "ToC Editor: Allow generating the ToC directly from individual files inside the ebook. Useful for EPUBs that have individual chapters in single files."
+      tickets: [1163520]
+
+    - title: "ToC Editor: Add buttons to indent/unindent the current entry"
+
+    - title: "ToC Editor: Right-click menu to perform various useful actions on entries in the ToC"
+
+    - title: "Column icons: Allow use of wide images as column icons"
+
+    - title: "Add USB ids for the Palm Pre2 and Samsung Galaxy phone to the device drivers"
+      tickets: [1162293,1163115]
+ 
+  bug fixes:
+    - title: "PDF Output: Fix generating page numbers causing links to not work."
+      tickets: [1162573]
+
+    - title: "Wrong filename output in error message when 'Guide reference not found'"
+      tickets: [1163659]
+
+    - title: "Get Books: Update Amazon, Barnes & Noble, Waterstones and Gutenberg store plugins for website change"
+
+    - title: "PDF Output: Fix 1 pixel wide left and top margins on the cover page for some PDF conversions due to incorrect rounding."
+      tickets: [1162054]
+
+    - title: "ToC Editor: Fix drag and drop of multiple items resulting in the dropped items being in random order sometimes."
+      tickets: [1161999]
+
+  improved recipes:
+    - Financial Times UK
+    - Sing Tao Daily
+    - Apple Daily
+    - A List Apart
+    - Business Week
+    - Harpers printed edition
+    - Harvard Business Review
+
+  new recipes:
+    - title: AM730
+      author: Eddie Lau
+
+    - title: Arret sur images 
+      author: Francois D
+
+    - title: Diario de Noticias
+      author: Jose Pinto
+
+- version: 0.9.25
+  date: 2013-03-29
+
+  new features:
+    - title: "Automatic adding: When checking for duplicates is enabled, use the same duplicates found dialog as is used during manual adding."
+      tickets: [1160914]
+
+    - title: "ToC Editor: Allow searching to find a location quickly when browsing through the book to select a location for a ToC item"
+
+    - title: "ToC Editor: Add a button to quickly flatten the entire table of contents"
+
+    - title: "Conversion: When converting a single book to EPUB or AZW3, add an option to automatically launch the Table of Contents editor after the conversion completes. Found under the Table of Contents section of the conversion dialog."
+ 
+  bug fixes:
+    - title: "calibredb: Nicer error messages when user provides invalid input"
+      tickets: [1160452,1160631]
+
+    - title: "News download: Always use the .jpg extension for jpeg images as apparently Moon+ Reader cannot handle .jpeg"
+
+    - title: "Fix Book Details popup keyboard navigation doesn't work on a Mac"
+      tickets: [1159610]
+
+    - title: "Fix a regression that caused the case of the book files to not be changed when changing the case of the title/author on case insensitive filesystems"
+
+  improved recipes:
+    - RTE news
+    - Various Polish news sources
+    - Psychology Today
+    - Foreign Affairs
+    - History Today
+    - Harpers Magazine (printed edition)
+    - Business Week Magazine
+    - The Hindu
+    - Irish Times
+    - Le Devoir
+
+  new recipes:
+    - title: Fortune Magazine 
+      author: Rick Shang
+
+    - title: Eclipse Online 
+      author: Jim DeVona
+
+- version: 0.9.24
+  date: 2013-03-22
+
+  new features:
+    - title: "ToC Editor: Allow auto-generation of Table of Contents entries from headings and/or links in the book"
+
+    - title: "EPUB/MOBI Catalogs: Allow saving used settings as presets which can be loaded easily later."
+      tickets: [1155587]
+
+    - title: "Indicate which columns are custom columns when selecting columns in the Preferences"
+      tickets: [1158066]
+
+    - title: "News download: Add an option recipe authors can set to have calibre automatically reduce the size of downloaded images by lowering their quality"
+ 
+  bug fixes:
+    - title: "News download: Fix a regression in 0.9.23 that prevented oldest_article from working with some RSS feeds."
+
+    - title: "Conversion: handle the :before and :after pseudo CSS selectors correctly"
+
+    - title: "AZW3 Output: Handle the case of the <guide> reference to a ToC containing an anchor correctly."
+      tickets: [1158413]
+
+    - title: "BiBTeX catalogs: Fix ISBN not being output and the library_name field causing catalog generation to fail"
+      tickets: [1156432, 1158127]
+
+    - title: "Conversion: Add support for CSS stylesheets that wrap their rules inside a @media rule."
+      tickets: [1157345]
+
+    - title: "Cover browser: Fix scrolling not working for books after the 32678'th book in a large library."
+      tickets: [1153204]
+
+    - title: "Linux: Update bundled libmtp version"
+
+    - title: "Clear the Book details panel when the current search returns no matches."
+      tickets: [1153026]
+
+    - title: "Fix a regression that broke creation of advanced column coloring rules"
+      tickets: [1156291]
+
+    - title: "Amazon metadata download: Handle cover images loaded via javascript on the amazon.de site"
+
+    - title: "Nicer error message when exporting a generated csv catalog to a file open in another program on windows."
+      tickets: [1155539]
+
+    - title: "Fix ebook-convert -h showing ANSI escape codes in the windows command prompt"
+      tickets: [1158499]
+
+  improved recipes:
+    - Various Polish news sources
+    - kath.net
+    - Il Giornale
+    - Kellog Insight
+
+  new recipes:
+    - title: 
+
 - version: 0.9.23
  date: 2013-03-15

--- a/manual/conversion.rst
+++ b/manual/conversion.rst
@ -434,6 +434,18 @@ a number of older formats either do not support a metadata based Table of Conten
 documents do not have one. In these cases, the options in this section can help you automatically
 generate a Table of Contents in the converted ebook, based on the actual content in the input document.

+.. note:: Using these options can be a little challenging to get exactly right.
+    If you prefer creating/editing the Table of Contents by hand, convert to
+    the EPUB or AZW3 formats and select the checkbox at the bottom of the
+    screen that says 
+    :guilabel:`Manually fine-tune the Table of Contents after conversion`. 
+    This will launch the ToC Editor tool after the conversion. It allows you to
+    create entries in the Table of Contents by simply clicking the place in the
+    book where you want the entry to point. You can also use the ToC Editor by
+    itself, without doing a conversion. Go to :guilabel:`Preferences->Toolbars`
+    and add the ToC Editor to the main toolbar. Then just select the book you
+    want to edit and click the ToC Editor button.
+
 The first option is :guilabel:`Force use of auto-generated Table of Contents`. By checking this option
 you can have |app| override any Table of Contents found in the metadata of the input document with the
 auto generated one. 
@ -456,7 +468,7 @@ For example, to remove all entries titles "Next" or "Previous" use::

    Next|Previous

-Finally, the :guilabel:`Level 1,2,3 TOC` options allow you to create a sophisticated multi-level Table of Contents.
+The :guilabel:`Level 1,2,3 TOC` options allow you to create a sophisticated multi-level Table of Contents.
 They are XPath expressions that match tags in the intermediate XHTML produced by the conversion pipeline. See the 
 :ref:`conversion-introduction` for how to get access to this XHTML. Also read the :ref:`xpath-tutorial`, to learn
 how to construct XPath expressions. Next to each option is a button that launches a wizard to help with the creation
@ -738,8 +750,61 @@ If this property is detected by |app|, the following custom properties are recog
    opf.series
    opf.seriesindex

-In addition to this, you can specify the picture to use as the cover by naming it ``opf.cover`` (right click, Picture->Options->Name) in the ODT. If no picture with this name is found, the 'smart' method is used.
-As the cover detection might result in double covers in certain output formats, the process will remove the paragraph (only if the only content is the cover!) from the document. But this works only with the named picture!
+In addition to this, you can specify the picture to use as the cover by naming
+it ``opf.cover`` (right click, Picture->Options->Name) in the ODT. If no
+picture with this name is found, the 'smart' method is used.  As the cover
+detection might result in double covers in certain output formats, the process
+will remove the paragraph (only if the only content is the cover!) from the
+document. But this works only with the named picture!

 To disable cover detection you can set the custom property ``opf.nocover`` ('Yes or No' type) to Yes in advanced mode.

+Converting to PDF
+~~~~~~~~~~~~~~~~~~~
+
+The first, most important, setting to decide on when converting to PDF is the page
+size. By default, |app| uses a page size defined by the current
+:guilabel:`Output profile`. So if your output profile is set to Kindle, |app|
+will create a PDF with page size suitable for viewing on the small kindle
+screen. However, if you view this PDF file on a computer screen, then it will
+appear to have too large fonts. To create "normal" sized PDFs, use the override
+page size option under :guilabel:`PDF Output` in the conversion dialog.
+
+You can insert arbitrary headers and footers on each page of the PDF by
+specifying header and footer templates. Templates are just snippets of HTML
+code that get rendered in the header and footer locations. For example, to
+display page numbers centered at the bottom of every page, in green, use the following
+footer template::
+
+    <p style="text-align:center; color:green">Page _PAGENUM_</p>
+
+|app| will automatically replace _PAGENUM_ with the current page number. You
+can even put different content on even and odd pages, for example the following
+header template will show the title on odd pages and the author on even pages::
+
+    <p style="text-align:right"><span class="even_page">_AUTHOR_</span><span class="odd_page"><i>_TITLE_</i></span></p>
+
+|app| will automatically replace _TITLE_ and _AUTHOR_ with the title and author
+of the document being converted.  You can also display text at the left and
+right edges and change the font size, as demonstrated with this header
+template::
+
+    <div style="font-size:x-small"><p style="float:left">_TITLE_</p><p style="float:right;"><i>_AUTHOR_</i></p></div>
+
+This will display the title at the left and the author at the right, in a font
+size smaller than the main text.
+
+Finally, you can also use the current section in templates, as shown below::
+
+    <p style="text-align:right">_SECTION_</p>
+
+_SECTION_ is replaced by whatever the name of the current section is. These
+names are taken from the metadata Table of Contents in the document (the PDF
+Outline). If the document has no table of contents then it will be replaced by
+empty text. If a single PDF page has multiple sections, the first section on
+the page will be used.
+
+.. note:: When adding headers and footers make sure you set the page top and
+    bottom margins to large enough values, under the Page Setup section of the
+    conversion dialog.
+
--- a/manual/faq.rst
+++ b/manual/faq.rst
@ -87,7 +87,9 @@ this bug.

 How do I convert a collection of HTML files in a specific order?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In order to convert a collection of HTML files in a specific oder, you have to create a table of contents file. That is, another HTML file that contains links to all the other files in the desired order. Such a file looks like::
+In order to convert a collection of HTML files in a specific oder, you have to
+create a table of contents file. That is, another HTML file that contains links
+to all the other files in the desired order. Such a file looks like::

   <html>
      <body>
@ -102,19 +104,36 @@ In order to convert a collection of HTML files in a specific oder, you have to c
      </body>
   </html>

-Then just add this HTML file to the GUI and use the convert button to create your ebook. 
+Then, just add this HTML file to the GUI and use the convert button to create
+your ebook. You can use the option in the Table of Contents section in the
+conversion dialog to control how the Table of Contents is generated.

-.. note:: By default, when adding HTML files, |app| follows links in the files in *depth first* order. This means that if file A.html links to B.html and C.html and D.html, but B.html also links to D.html, then the files will be in the order A.html, B.html, D.html, C.html. If instead you want the order to be A.html, B.html, C.html, D.html then you must tell |app| to add your files in *breadth first* order. Do this by going to Preferences->Plugins and customizing the HTML to ZIP plugin.
+.. note:: By default, when adding HTML files, |app| follows links in the files
+    in *depth first* order. This means that if file A.html links to B.html and
+    C.html and D.html, but B.html also links to D.html, then the files will be
+    in the order A.html, B.html, D.html, C.html. If instead you want the order
+    to be A.html, B.html, C.html, D.html then you must tell |app| to add your
+    files in *breadth first* order. Do this by going to Preferences->Plugins
+    and customizing the HTML to ZIP plugin.

 The EPUB I produced with |app| is not valid?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-|app| does not guarantee that an EPUB produced by it is valid. The only guarantee it makes is that if you feed it valid XHTML 1.1 + CSS 2.1 it will output a valid EPUB. |app| is designed for ebook consumers, not producers. It tries hard to ensure that EPUBs it produces actually work as intended on a wide variety of devices, a goal that is incompatible with producing valid EPUBs, and one that is far more important to the vast majority of its users. If you need a tool that always produces valid EPUBs, |app| is not for you.
+|app| does not guarantee that an EPUB produced by it is valid. The only
+guarantee it makes is that if you feed it valid XHTML 1.1 + CSS 2.1 it will
+output a valid EPUB. |app| is designed for ebook consumers, not producers. It
+tries hard to ensure that EPUBs it produces actually work as intended on a wide
+variety of devices, a goal that is incompatible with producing valid EPUBs, and
+one that is far more important to the vast majority of its users. If you need a
+tool that always produces valid EPUBs, |app| is not for you.

 How do I use some of the advanced features of the conversion tools?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- You can get help on any individual feature of the converters by mousing over it in the GUI or running ``ebook-convert dummy.html .epub -h`` at a terminal. A good place to start is to look at the following demo files that demonstrate some of the advanced features:
-  * `html-demo.zip <http://calibre-ebook.com/downloads/html-demo.zip>`_
+You can get help on any individual feature of the converters by mousing over
+it in the GUI or running ``ebook-convert dummy.html .epub -h`` at a terminal.
+A good place to start is to look at the following demo file that demonstrates
+some of the advanced features
+`html-demo.zip <http://calibre-ebook.com/downloads/html-demo.zip>`_


 Device Integration
@ -126,11 +145,11 @@ Device Integration

 What devices does |app| support?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|app| can directly connect to all the major (and most of the minor) ebook reading devices,
-smarthphones, tablets, etc.
-In addition, using the :guilabel:`Connect to folder` function you can use it with any ebook reader that exports itself as a USB disk. 
-You can even connect to Apple devices (via iTunes), using the :guilabel:`Connect to iTunes`
-function.
+|app| can directly connect to all the major (and most of the minor) ebook
+reading devices, smarthphones, tablets, etc.  In addition, using the
+:guilabel:`Connect to folder` function you can use it with any ebook reader
+that exports itself as a USB disk.  You can even connect to Apple devices (via
+iTunes), using the :guilabel:`Connect to iTunes` function.

 .. _devsupport:

--- a/recipes/adventure_zone_pl.recipe
+++ b/recipes/adventure_zone_pl.recipe
@ -10,15 +10,15 @@ class Adventure_zone(BasicNewsRecipe):
    oldest_article = 20
    max_articles_per_feed = 100
    cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
-    index='http://www.adventure-zone.info/fusion/'
+    index = 'http://www.adventure-zone.info/fusion/'
    use_embedded_content = False
    preprocess_regexps     = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
    (re.compile(r'</?table.*?>'), lambda match: ''),
    (re.compile(r'</?tbody.*?>'), lambda match: '')]
-    remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
-    remove_tags= [dict(name='img', attrs={'alt':'Drukuj'})]
-    remove_tags_after= dict(id='comments')
-    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; }'
+    remove_tags_before = dict(name='td', attrs={'class':'main-bg'})
+    remove_tags = [dict(name='img', attrs={'alt':'Drukuj'})]
+    remove_tags_after = dict(id='comments')
+    extra_css              = '.main-bg{text-align: left;}  td.capmain{ font-size: 22px; } img.news-category {float: left; margin-right: 5px;}'
    feeds          = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]

    '''def get_cover_url(self):
@ -26,7 +26,7 @@ class Adventure_zone(BasicNewsRecipe):
        cover=soup.find(id='box_OstatninumerAZ')
        self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
        return getattr(self, 'cover_url', self.cover_url)'''
-        
+
    def populate_article_metadata(self, article, soup, first):
        result = re.search('(.+) - Adventure Zone', soup.title.string)
        if result:
@ -66,5 +66,3 @@ class Adventure_zone(BasicNewsRecipe):
            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
                a['href']=self.index + a['href']
        return soup
-           
-            
--- a/recipes/am730.recipe
+++ b/recipes/am730.recipe
@ -0,0 +1,290 @@
+# vim:fileencoding=UTF-8
+from __future__ import unicode_literals
+__license__   = 'GPL v3'
+__copyright__ = '2013, Eddie Lau'
+__Date__ = ''
+__HiResImg__ = True
+
+'''
+Change Log:
+2013/03/30 -- first version
+'''
+
+from calibre import (__appname__, force_unicode, strftime)
+from calibre.utils.date import now as nowf
+import os, datetime, re
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from contextlib import nested
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.localization import canonicalize_lang
+
+class AppleDaily(BasicNewsRecipe):
+    title          = u'AM730'
+    __author__     = 'Eddie Lau'
+    publisher      = 'AM730'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    auto_cleanup = False
+    language = 'zh'
+    encoding = 'utf-8'
+    auto_cleanup = False
+    remove_javascript = True
+    use_embedded_content   = False
+    no_stylesheets = True
+    description = 'http://www.am730.com.hk'
+    category    = 'Chinese, News, Hong Kong'
+    masthead_url = 'http://www.am730.com.hk/images/logo.jpg'
+
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} div[id=articleHeader] {font-size:200%; text-align:left; font-weight:bold;} photocaption {font-size:50%; margin-left:auto; margin-right:auto;}'
+    keep_only_tags = [dict(name='div', attrs={'id':'articleHeader'}),
+                      dict(name='div', attrs={'class':'thecontent wordsnap'}),
+                      dict(name='a', attrs={'class':'lightboximg'})]
+    remove_tags = [dict(name='img', attrs={'src':'/images/am730_article_logo.jpg'}),
+                   dict(name='img', attrs={'src':'/images/am_endmark.gif'})]
+
+    def get_dtlocal(self):
+        dt_utc = datetime.datetime.utcnow()
+        # convert UTC to local hk time - at HKT 6am, all news are available
+        return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
+
+    def get_fetchdate(self):
+        if __Date__ <> '':
+            return __Date__
+        else:
+            return self.get_dtlocal().strftime("%Y%m%d")
+
+    def get_fetchformatteddate(self):
+        if __Date__ <> '':
+            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%Y-%m-%d")
+
+    def get_fetchyear(self):
+        if __Date__ <> '':
+            return __Date__[0:4]
+        else:
+            return self.get_dtlocal().strftime("%Y")
+
+    def get_fetchmonth(self):
+        if __Date__ <> '':
+            return __Date__[4:6]
+        else:
+            return self.get_dtlocal().strftime("%m")
+
+    def get_fetchday(self):
+        if __Date__ <> '':
+            return __Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%d")
+
+    # Note: does not work with custom date given by __Date__
+    def get_weekday(self):
+        return self.get_dtlocal().weekday()
+
+    def populate_article_metadata(self, article, soup, first):
+        if first and hasattr(self, 'add_toc_thumbnail'):
+            picdiv = soup.find('img')
+            if picdiv is not None:
+                self.add_toc_thumbnail(article,picdiv['src'])
+
+    def parse_index(self):
+        feeds = []
+        soup = self.index_to_soup('http://www.am730.com.hk/')
+        ul = soup.find(attrs={'class':'nav-section'})
+        sectionList = []
+        for li in ul.findAll('li'):
+            a = 'http://www.am730.com.hk/' + li.find('a', href=True).get('href', False)
+            title = li.find('a').get('title', False).strip()
+            sectionList.append((title, a))
+        for title, url in sectionList:
+            articles = self.parse_section(url)
+            if articles:
+                feeds.append((title, articles))
+        return feeds
+
+    def parse_section(self, url):
+        soup = self.index_to_soup(url)
+        items = soup.findAll(attrs={'style':'padding-bottom: 15px;'})
+        current_articles = []
+        for item in items:
+            a = item.find(attrs={'class':'t6 f14'}).find('a', href=True)
+            articlelink = 'http://www.am730.com.hk/' + a.get('href', True)
+            title = self.tag_to_string(a)
+            description = self.tag_to_string(item.find(attrs={'class':'t3 f14'}))
+            current_articles.append({'title': title, 'url': articlelink, 'description': description})
+        return current_articles
+
+    def preprocess_html(self, soup):
+        multia = soup.findAll('a')
+        for a in multia:
+            if not (a == None):
+                image = a.find('img')
+                if not (image == None):
+                    if __HiResImg__:
+                        image['src'] = image.get('src').replace('/thumbs/', '/')
+                    caption = image.get('alt')
+                    tag = Tag(soup, "photo", [])
+                    tag2 = Tag(soup, "photocaption", [])
+                    tag.insert(0, image)
+                    if not caption == None:
+                        tag2.insert(0, caption)
+                        tag.insert(1, tag2)
+                    a.replaceWith(tag)
+        return soup
+
+    def create_opf(self, feeds, dir=None):
+        if dir is None:
+            dir = self.output_dir
+        title = self.short_title()
+        if self.output_profile.periodical_date_in_title:
+            title += strftime(self.timefmt)
+        mi = MetaInformation(title, [__appname__])
+        mi.publisher = __appname__
+        mi.author_sort = __appname__
+        if self.publication_type:
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        mi.timestamp = nowf()
+        article_titles, aseen = [], set()
+        for f in feeds:
+            for a in f:
+                if a.title and a.title not in aseen:
+                    aseen.add(a.title)
+                    article_titles.append(force_unicode(a.title, 'utf-8'))
+
+        mi.comments = self.description
+        if not isinstance(mi.comments, unicode):
+            mi.comments = mi.comments.decode('utf-8', 'replace')
+        mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
+                '\n\n'.join(article_titles))
+
+        language = canonicalize_lang(self.language)
+        if language is not None:
+            mi.language = language
+        # This one affects the pub date shown in kindle title
+        #mi.pubdate = nowf()
+        # now appears to need the time field to be > 12.00noon as well
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
+        opf_path = os.path.join(dir, 'index.opf')
+        ncx_path = os.path.join(dir, 'index.ncx')
+
+        opf = OPFCreator(dir, mi)
+        # Add mastheadImage entry to <guide> section
+        mp = getattr(self, 'masthead_path', None)
+        if mp is not None and os.access(mp, os.R_OK):
+            from calibre.ebooks.metadata.opf2 import Guide
+            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+            ref.type = 'masthead'
+            ref.title = 'Masthead Image'
+            opf.guide.append(ref)
+
+        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        manifest.append(os.path.join(dir, 'index.html'))
+        manifest.append(os.path.join(dir, 'index.ncx'))
+
+        # Get cover
+        cpath = getattr(self, 'cover_path', None)
+        if cpath is None:
+            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+            if self.default_cover(pf):
+                cpath =  pf.name
+        if cpath is not None and os.access(cpath, os.R_OK):
+            opf.cover = cpath
+            manifest.append(cpath)
+
+        # Get masthead
+        mpath = getattr(self, 'masthead_path', None)
+        if mpath is not None and os.access(mpath, os.R_OK):
+            manifest.append(mpath)
+
+        opf.create_manifest_from_files_in(manifest)
+        for mani in opf.manifest:
+            if mani.path.endswith('.ncx'):
+                mani.id = 'ncx'
+            if mani.path.endswith('mastheadImage.jpg'):
+                mani.id = 'masthead-image'
+
+        entries = ['index.html']
+        toc = TOC(base_path=dir)
+        self.play_order_counter = 0
+        self.play_order_map = {}
+
+
+        def feed_index(num, parent):
+            f = feeds[num]
+            for j, a in enumerate(f):
+                if getattr(a, 'downloaded', False):
+                    adir = 'feed_%d/article_%d/'%(num, j)
+                    auth = a.author
+                    if not auth:
+                        auth = None
+                    desc = a.text_summary
+                    if not desc:
+                        desc = None
+                    else:
+                        desc = self.description_limiter(desc)
+                    tt = a.toc_thumbnail if a.toc_thumbnail else None
+                    entries.append('%sindex.html'%adir)
+                    po = self.play_order_map.get(entries[-1], None)
+                    if po is None:
+                        self.play_order_counter += 1
+                        po = self.play_order_counter
+                    parent.add_item('%sindex.html'%adir, None,
+                            a.title if a.title else _('Untitled Article'),
+                            play_order=po, author=auth,
+                            description=desc, toc_thumbnail=tt)
+                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+                    for sp in a.sub_pages:
+                        prefix = os.path.commonprefix([opf_path, sp])
+                        relp = sp[len(prefix):]
+                        entries.append(relp.replace(os.sep, '/'))
+                        last = sp
+
+                    if os.path.exists(last):
+                        with open(last, 'rb') as fi:
+                            src = fi.read().decode('utf-8')
+                        soup = BeautifulSoup(src)
+                        body = soup.find('body')
+                        if body is not None:
+                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+                            templ = self.navbar.generate(True, num, j, len(f),
+                                            not self.has_single_feed,
+                                            a.orig_url, __appname__, prefix=prefix,
+                                            center=self.center_navbar)
+                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+                            body.insert(len(body.contents), elem)
+                            with open(last, 'wb') as fi:
+                                fi.write(unicode(soup).encode('utf-8'))
+        if len(feeds) == 0:
+            raise Exception('All feeds are empty, aborting.')
+
+        if len(feeds) > 1:
+            for i, f in enumerate(feeds):
+                entries.append('feed_%d/index.html'%i)
+                po = self.play_order_map.get(entries[-1], None)
+                if po is None:
+                    self.play_order_counter += 1
+                    po = self.play_order_counter
+                auth = getattr(f, 'author', None)
+                if not auth:
+                    auth = None
+                desc = getattr(f, 'description', None)
+                if not desc:
+                    desc = None
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                    f.title, play_order=po, description=desc, author=auth))
+
+        else:
+            entries.append('feed_%d/index.html'%0)
+            feed_index(0, toc)
+
+        for i, p in enumerate(entries):
+            entries[i] = os.path.join(dir, p.replace('/', os.sep))
+        opf.create_spine(entries)
+        opf.set_toc(toc)
+
+        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+            opf.render(opf_file, ncx_file)
+
--- a/recipes/apple_daily.recipe
+++ b/recipes/apple_daily.recipe
@ -1,161 +1,275 @@
-# -*- coding: utf-8 -*-
-import re
+# vim:fileencoding=UTF-8
+from __future__ import unicode_literals
+__license__   = 'GPL v3'
+__copyright__ = '2013, Eddie Lau'
+__Date__ = ''
+
+from calibre import (__appname__, force_unicode, strftime)
+from calibre.utils.date import now as nowf
+import os, datetime, re
 from calibre.web.feeds.recipes import BasicNewsRecipe
+from contextlib import nested
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.metadata.toc import TOC
+from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.localization import canonicalize_lang

 class AppleDaily(BasicNewsRecipe):
-
-    title       = u'蘋果日報'
-    __author__  = u'蘋果日報'
-    __publisher__  = u'蘋果日報'
-    description = u'蘋果日報'
-    masthead_url = 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
-    language = 'zh_TW'
-    encoding = 'UTF-8'
-    timefmt = ' [%a, %d %b, %Y]'
-    needs_subscription = False
+    title          = u'蘋果日報 (香港)'
+    __author__     = 'Eddie Lau'
+    publisher      = '蘋果日報'
+    oldest_article = 1
+    max_articles_per_feed = 100
+    auto_cleanup = False
+    language = 'zh'
+    encoding = 'utf-8'
+    auto_cleanup = False
    remove_javascript = True
-    remove_tags_before = dict(name=['ul', 'h1'])
-    remove_tags_after  = dict(name='form')
-    remove_tags = [dict(attrs={'class':['articleTools', 'post-tools', 'side_tool', 'nextArticleLink clearfix']}),
-                dict(id=['footer', 'toolsRight', 'articleInline', 'navigation', 'archive', 'side_search', 'blog_sidebar', 'side_tool', 'side_index']),
-                dict(name=['script', 'noscript', 'style', 'form'])]
+    use_embedded_content   = False
    no_stylesheets = True
-    extra_css = '''
-    	@font-face {font-family: "uming", serif, sans-serif;  src: url(res:///usr/share/fonts/truetype/arphic/uming.ttc); }\n
-	    body {margin-right: 8pt; font-family: 'uming', serif;}
-        h1 {font-family: 'uming', serif, sans-serif}
-            '''
-    #extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
+    description = 'http://hkm.appledaily.com/'
+    category    = 'Chinese, News, Hong Kong'
+    masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png'

-    preprocess_regexps = [
-       (re.compile(r'img.php?server=(?P<server>[^&]+)&path=(?P<path>[^&]+).*', re.DOTALL|re.IGNORECASE),
-        lambda match: 'http://' + match.group('server') + '/' + match.group('path')),
-    ]
+    extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}'
+    keep_only_tags = [dict(name='div', attrs={'id':'content-article'})]
+    remove_tags = [dict(name='div', attrs={'class':'prev-next-btn'}),
+                   dict(name='p', attrs={'class':'next'})]
+
+    def get_dtlocal(self):
+        dt_utc = datetime.datetime.utcnow()
+        # convert UTC to local hk time - at HKT 6am, all news are available
+        return dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(6.0/24)
+
+    def get_fetchdate(self):
+        if __Date__ <> '':
+            return __Date__
+        else:
+            return self.get_dtlocal().strftime("%Y%m%d")
+
+    def get_fetchformatteddate(self):
+        if __Date__ <> '':
+            return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%Y-%m-%d")
+
+    def get_fetchyear(self):
+        if __Date__ <> '':
+            return __Date__[0:4]
+        else:
+            return self.get_dtlocal().strftime("%Y")
+
+    def get_fetchmonth(self):
+        if __Date__ <> '':
+            return __Date__[4:6]
+        else:
+            return self.get_dtlocal().strftime("%m")
+
+    def get_fetchday(self):
+        if __Date__ <> '':
+            return __Date__[6:8]
+        else:
+            return self.get_dtlocal().strftime("%d")
+
+    # Note: does not work with custom date given by __Date__
+    def get_weekday(self):
+        return self.get_dtlocal().weekday()

    def get_cover_url(self):
-        return 'http://hk.apple.nextmedia.com/template/common/header/2009/images/atnextheader_logo_appledaily.gif'
-
-
-    #def get_browser(self):
-        #br = BasicNewsRecipe.get_browser(self)
-        #if self.username is not None and self.password is not None:
-        #    br.open('http://www.nytimes.com/auth/login')
-        #    br.select_form(name='login')
-        #    br['USERID']   = self.username
-        #    br['PASSWORD'] = self.password
-        #    br.submit()
-        #return br
-
-    def preprocess_html(self, soup):
-        #process all the images
-        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
-            iurl = tag['src']
-            #print 'checking image: ' + iurl
-
-            #img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
-            p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
-
-            m = p.search(iurl)
-
-            if m is not None:
-                iurl = 'http://' + m.group('server') + '/' + m.group('path')
-                #print 'working! new url: ' + iurl
-                tag['src'] = iurl
-            #else:
-                #print 'not good'
-
-        for tag in soup.findAll(lambda tag: tag.name.lower()=='a' and tag.has_key('href')):
-            iurl = tag['href']
-            #print 'checking image: ' + iurl
-
-            #img\.php?server\=(?P<server>[^&]+)&path=(?P<path>[^&]+)
-            p = re.compile(r'img\.php\?server=(?P<server>[^&]+)&path=(?P<path>[^&]+)', re.DOTALL|re.IGNORECASE)
-
-            m = p.search(iurl)
-
-            if m is not None:
-                iurl = 'http://' + m.group('server') + '/' + m.group('path')
-                #print 'working! new url: ' + iurl
-                tag['href'] = iurl
-            #else:
-                #print 'not good'
-
-        return soup
+        soup = self.index_to_soup('http://hkm.appledaily.com/')
+        cover = soup.find(attrs={'class':'top-news'}).get('src', False)
+        br = BasicNewsRecipe.get_browser(self)
+        try:
+            br.open(cover)
+        except:
+            cover = None
+        return cover

+    def populate_article_metadata(self, article, soup, first):
+        if first and hasattr(self, 'add_toc_thumbnail'):
+            picdiv = soup.find('img')
+            if picdiv is not None:
+                self.add_toc_thumbnail(article,picdiv['src'])

    def parse_index(self):
-        base = 'http://news.hotpot.hk/fruit'
-        soup = self.index_to_soup('http://news.hotpot.hk/fruit/index.php')
+        feeds = []
+        soup = self.index_to_soup('http://hkm.appledaily.com/')
+        ul = soup.find(attrs={'class':'menu'})
+        sectionList = []
+        for li in ul.findAll('li'):
+            a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False)
+            title = li.find('a', text=True).strip()
+            if not title == u'動新聞':
+                sectionList.append((title, a))
+        for title, url in sectionList:
+            articles = self.parse_section(url)
+            if articles:
+                feeds.append((title, articles))
+        return feeds

-        #def feed_title(div):
-        #    return ''.join(div.findAll(text=True, recursive=False)).strip()
+    def parse_section(self, url):
+        soup = self.index_to_soup(url)
+        ul = soup.find(attrs={'class':'list'})
+        current_articles = []
+        for li in ul.findAll('li'):
+            a = li.find('a', href=True)
+            title = li.find('p', text=True).strip()
+            if a is not None:
+                current_articles.append({'title': title, 'url':'http://hkm.appledaily.com/' + a.get('href', False)})
+            pass
+        return current_articles

-        articles = {}
-        key = None
-        ans = []
-        for div in soup.findAll('li'):
-            key = div.find(text=True, recursive=True);
-            #if key == u'豪情':
-           #    continue;
+    def create_opf(self, feeds, dir=None):
+        if dir is None:
+            dir = self.output_dir
+        title = self.short_title()
+        if self.output_profile.periodical_date_in_title:
+            title += strftime(self.timefmt)
+        mi = MetaInformation(title, [__appname__])
+        mi.publisher = __appname__
+        mi.author_sort = __appname__
+        if self.publication_type:
+            mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
+        mi.timestamp = nowf()
+        article_titles, aseen = [], set()
+        for f in feeds:
+            for a in f:
+                if a.title and a.title not in aseen:
+                    aseen.add(a.title)
+                    article_titles.append(force_unicode(a.title, 'utf-8'))

-            print 'section=' + key
+        mi.comments = self.description
+        if not isinstance(mi.comments, unicode):
+            mi.comments = mi.comments.decode('utf-8', 'replace')
+        mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
+                '\n\n'.join(article_titles))

-            articles[key] = []
+        language = canonicalize_lang(self.language)
+        if language is not None:
+            mi.language = language
+        # This one affects the pub date shown in kindle title
+        #mi.pubdate = nowf()
+        # now appears to need the time field to be > 12.00noon as well
+        mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
+        opf_path = os.path.join(dir, 'index.opf')
+        ncx_path = os.path.join(dir, 'index.ncx')

-            ans.append(key)
+        opf = OPFCreator(dir, mi)
+        # Add mastheadImage entry to <guide> section
+        mp = getattr(self, 'masthead_path', None)
+        if mp is not None and os.access(mp, os.R_OK):
+            from calibre.ebooks.metadata.opf2 import Guide
+            ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
+            ref.type = 'masthead'
+            ref.title = 'Masthead Image'
+            opf.guide.append(ref)

-            a = div.find('a', href=True)
+        manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
+        manifest.append(os.path.join(dir, 'index.html'))
+        manifest.append(os.path.join(dir, 'index.ncx'))

-            if not a:
-                continue
+        # Get cover
+        cpath = getattr(self, 'cover_path', None)
+        if cpath is None:
+            pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
+            if self.default_cover(pf):
+                cpath =  pf.name
+        if cpath is not None and os.access(cpath, os.R_OK):
+            opf.cover = cpath
+            manifest.append(cpath)

-            url = base + '/' + a['href']
-            print 'url=' + url
+        # Get masthead
+        mpath = getattr(self, 'masthead_path', None)
+        if mpath is not None and os.access(mpath, os.R_OK):
+            manifest.append(mpath)

-            if not articles.has_key(key):
-                articles[key] = []
-            else:
-                # sub page
-                subSoup = self.index_to_soup(url)
+        opf.create_manifest_from_files_in(manifest)
+        for mani in opf.manifest:
+            if mani.path.endswith('.ncx'):
+                mani.id = 'ncx'
+            if mani.path.endswith('mastheadImage.jpg'):
+                mani.id = 'masthead-image'

-                for subDiv in subSoup.findAll('li'):
-                    subA = subDiv.find('a', href=True)
-                    subTitle = subDiv.find(text=True, recursive=True)
-                    subUrl = base + '/' + subA['href']
-
-                    print 'subUrl' + subUrl
-
-                    articles[key].append(
-                        dict(title=subTitle,
-                         url=subUrl,
-                         date='',
-                         description='',
-                         content=''))
+        entries = ['index.html']
+        toc = TOC(base_path=dir)
+        self.play_order_counter = 0
+        self.play_order_map = {}


-#             elif div['class'] in ['story', 'story headline']:
-#                 a = div.find('a', href=True)
-#                 if not a:
-#                     continue
-#                 url = re.sub(r'\?.*', '', a['href'])
-#                 url += '?pagewanted=all'
-#                 title = self.tag_to_string(a, use_alt=True).strip()
-#                 description = ''
-#                 pubdate = strftime('%a, %d %b')
-#                 summary = div.find(True, attrs={'class':'summary'})
-#                 if summary:
-#                     description = self.tag_to_string(summary, use_alt=False)
-#
-#                 feed = key if key is not None else 'Uncategorized'
-#                 if not articles.has_key(feed):
-#                     articles[feed] = []
-#                 if not 'podcasts' in url:
-#                     articles[feed].append(
-#                               dict(title=title, url=url, date=pubdate,
-#                                    description=description,
-#                                    content=''))
-#        ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
-        ans = [(unicode(key), articles[key]) for key in ans if articles.has_key(key)]
-        return ans
+        def feed_index(num, parent):
+            f = feeds[num]
+            for j, a in enumerate(f):
+                if getattr(a, 'downloaded', False):
+                    adir = 'feed_%d/article_%d/'%(num, j)
+                    auth = a.author
+                    if not auth:
+                        auth = None
+                    desc = a.text_summary
+                    if not desc:
+                        desc = None
+                    else:
+                        desc = self.description_limiter(desc)
+                    tt = a.toc_thumbnail if a.toc_thumbnail else None
+                    entries.append('%sindex.html'%adir)
+                    po = self.play_order_map.get(entries[-1], None)
+                    if po is None:
+                        self.play_order_counter += 1
+                        po = self.play_order_counter
+                    parent.add_item('%sindex.html'%adir, None,
+                            a.title if a.title else _('Untitled Article'),
+                            play_order=po, author=auth,
+                            description=desc, toc_thumbnail=tt)
+                    last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
+                    for sp in a.sub_pages:
+                        prefix = os.path.commonprefix([opf_path, sp])
+                        relp = sp[len(prefix):]
+                        entries.append(relp.replace(os.sep, '/'))
+                        last = sp

+                    if os.path.exists(last):
+                        with open(last, 'rb') as fi:
+                            src = fi.read().decode('utf-8')
+                        soup = BeautifulSoup(src)
+                        body = soup.find('body')
+                        if body is not None:
+                            prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
+                            templ = self.navbar.generate(True, num, j, len(f),
+                                            not self.has_single_feed,
+                                            a.orig_url, __appname__, prefix=prefix,
+                                            center=self.center_navbar)
+                            elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
+                            body.insert(len(body.contents), elem)
+                            with open(last, 'wb') as fi:
+                                fi.write(unicode(soup).encode('utf-8'))
+        if len(feeds) == 0:
+            raise Exception('All feeds are empty, aborting.')
+
+        if len(feeds) > 1:
+            for i, f in enumerate(feeds):
+                entries.append('feed_%d/index.html'%i)
+                po = self.play_order_map.get(entries[-1], None)
+                if po is None:
+                    self.play_order_counter += 1
+                    po = self.play_order_counter
+                auth = getattr(f, 'author', None)
+                if not auth:
+                    auth = None
+                desc = getattr(f, 'description', None)
+                if not desc:
+                    desc = None
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                    f.title, play_order=po, description=desc, author=auth))
+
+        else:
+            entries.append('feed_%d/index.html'%0)
+            feed_index(0, toc)
+
+        for i, p in enumerate(entries):
+            entries[i] = os.path.join(dir, p.replace('/', os.sep))
+        opf.create_spine(entries)
+        opf.set_toc(toc)
+
+        with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
+            opf.render(opf_file, ncx_file)

--- a/recipes/arret_sur_images.recipe
+++ b/recipes/arret_sur_images.recipe
@ -0,0 +1,54 @@
+from __future__ import unicode_literals
+
+__license__ = 'WTFPL'
+__author__ = '2013, François D. <franek at chicour.net>'
+__description__ = 'Get some fresh news from Arrêt sur images'
+
+
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class Asi(BasicNewsRecipe):
+
+    title       = 'Arrêt sur images'
+    __author__  = 'François D. (aka franek)'
+    description = 'Global news in french from news site "Arrêt sur images"'
+
+    oldest_article = 7.0
+    language = 'fr'
+    needs_subscription = True
+    max_articles_per_feed = 100
+
+    simultaneous_downloads = 1
+    timefmt = '[%a, %d %b %Y %I:%M +0200]'
+    cover_url = 'http://www.arretsurimages.net/images/header/menu/menu_1.png'
+
+    use_embedded_content = False
+    no_stylesheets = True
+    remove_javascript = True
+
+    feeds =  [
+        ('vite dit et gratuit', 'http://www.arretsurimages.net/vite-dit.rss'),
+        ('Toutes les chroniques', 'http://www.arretsurimages.net/chroniques.rss'),
+        ('Contenus et dossiers', 'http://www.arretsurimages.net/dossiers.rss'),
+    ]
+
+    conversion_options = { 'smarten_punctuation' : True }
+
+    remove_tags = [dict(id='vite-titre'), dict(id='header'), dict(id='wrap-connexion'), dict(id='col_right'), dict(name='div', attrs={'class':'bloc-chroniqueur-2'}), dict(id='footercontainer')]
+
+    def print_version(self, url):
+        return url.replace('contenu.php', 'contenu-imprimable.php')
+
+    def get_browser(self):
+        # Need to use robust HTML parser
+        br = BasicNewsRecipe.get_browser(self, use_robust_parser=True)
+        if self.username is not None and self.password is not None:
+            br.open('http://www.arretsurimages.net/index.php')
+            br.select_form(nr=0)
+            br.form.set_all_readonly(False)
+            br['redir'] = 'forum/login.php'
+            br['username'] = self.username
+            br['password'] = self.password
+            br.submit()
+        return br
+
--- a/recipes/astroflesz.recipe
+++ b/recipes/astroflesz.recipe
@ -2,12 +2,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class Astroflesz(BasicNewsRecipe):
-    title          = u'Astroflesz'
+    title = u'Astroflesz'
    oldest_article = 7
-    __author__        = 'fenuks'
-    description   = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne'
-    category       = 'astronomy'
-    language       = 'pl'
+    __author__ = 'fenuks'
+    description = u'astroflesz.pl - to portal poświęcony astronomii. Informuje zarówno o aktualnych wydarzeniach i odkryciach naukowych, jak również zapowiada ciekawe zjawiska astronomiczne'
+    category = 'astronomy'
+    language = 'pl'
    cover_url = 'http://www.astroflesz.pl/templates/astroflesz/images/logo/logo.png'
    ignore_duplicate_articles = {'title', 'url'}
    max_articles_per_feed = 100
@ -17,4 +17,11 @@ class Astroflesz(BasicNewsRecipe):
    keep_only_tags = [dict(id="k2Container")]
    remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
    remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
-    feeds          = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
+    feeds = [(u'Wszystkie', u'http://astroflesz.pl/?format=feed')]
+
+    def postprocess_html(self, soup, first_fetch):
+        t = soup.find(attrs={'class':'itemIntroText'})
+        if t:
+            for i in t.findAll('img'):
+                i['style'] = 'float: left; margin-right: 5px;'
+        return soup
--- a/recipes/badania_net.recipe
+++ b/recipes/badania_net.recipe
@ -1,17 +1,20 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import re
 class BadaniaNet(BasicNewsRecipe):
-    title          = u'badania.net'
+    title = u'badania.net'
    __author__ = 'fenuks'
-    description   = u'chcesz wiedzieć więcej?'
-    category       = 'science'
-    language       = 'pl'
+    description = u'chcesz wiedzieć więcej?'
+    category = 'science'
+    language = 'pl'
    cover_url = 'http://badania.net/wp-content/badanianet_green_transparent.png'
+    extra_css = '.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
+    preprocess_regexps = [(re.compile(r"<h4>Tekst sponsoruje</h4>", re.IGNORECASE), lambda m: ''),]
    remove_empty_feeds = True
    use_embedded_content = False
    remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})]
    remove_tags_after = dict(attrs={'class':'omc-single-tags'})
    keep_only_tags = [dict(id='omc-full-article')]
-    feeds          = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
+    feeds = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
--- a/recipes/birmingham_post.recipe
+++ b/recipes/birmingham_post.recipe
@ -9,14 +9,14 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    __author__ = 'Dave Asbury'
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
    oldest_article = 2
-    max_articles_per_feed = 12
+    max_articles_per_feed = 20
    linearize_tables = True
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    auto_cleanup = True
    language = 'en_GB'
-
+    compress_news_images = True
    cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'

    masthead_url        = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
--- a/recipes/bwmagazine.recipe
+++ b/recipes/bwmagazine.recipe
@ -37,68 +37,15 @@ class BusinessWeek(BasicNewsRecipe):
                        , 'language'  : language
                        }

-    #remove_tags       = [
-                           #dict(attrs={'class':'inStory'})
-                          #,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
-                          #,dict(attrs={'id':['inset','videoDisplay']})
-                        #]
-    #keep_only_tags    = [dict(name='div', attrs={'id':['story-body','storyBody']})]
-    remove_attributes = ['lang']
-    match_regexps     = [r'http://www.businessweek.com/.*_page_[1-9].*']
-

    feeds = [
-              (u'Top Stories', u'http://www.businessweek.com/topStories/rss/topStories.rss'),
-              (u'Top News'   , u'http://www.businessweek.com/rss/bwdaily.rss'              ),
-              (u'Asia', u'http://www.businessweek.com/rss/asia.rss'),
-              (u'Autos', u'http://www.businessweek.com/rss/autos/index.rss'),
-              (u'Classic Cars', u'http://rss.businessweek.com/bw_rss/classiccars'),
-              (u'Hybrids', u'http://rss.businessweek.com/bw_rss/hybrids'),
-              (u'Europe', u'http://www.businessweek.com/rss/europe.rss'),
-              (u'Auto Reviews', u'http://rss.businessweek.com/bw_rss/autoreviews'),
-              (u'Innovation & Design', u'http://www.businessweek.com/rss/innovate.rss'),
-              (u'Architecture', u'http://www.businessweek.com/rss/architecture.rss'),
-              (u'Brand Equity', u'http://www.businessweek.com/rss/brandequity.rss'),
-              (u'Auto Design', u'http://www.businessweek.com/rss/carbuff.rss'),
-              (u'Game Room', u'http://rss.businessweek.com/bw_rss/gameroom'),
-              (u'Technology', u'http://www.businessweek.com/rss/technology.rss'),
-              (u'Investing', u'http://rss.businessweek.com/bw_rss/investor'),
-              (u'Small Business', u'http://www.businessweek.com/rss/smallbiz.rss'),
-              (u'Careers', u'http://rss.businessweek.com/bw_rss/careers'),
-              (u'B-Schools', u'http://www.businessweek.com/rss/bschools.rss'),
-              (u'Magazine Selections', u'http://www.businessweek.com/rss/magazine.rss'),
-              (u'CEO Guide to Tech', u'http://www.businessweek.com/rss/ceo_guide_tech.rss'),
+              (u'Top Stories', u'http://www.businessweek.com/feeds/most-popular.rss'),
            ]

-    def get_article_url(self, article):
-        url = article.get('guid', None)
-        if 'podcasts' in url:
-            return None
-        if 'surveys' in url:
-            return None
-        if 'images' in url:
-            return None
-        if 'feedroom' in url:
-            return None
-        if '/magazine/toc/' in url:
-            return None
-        rurl, sep, rest = url.rpartition('?')
-        if rurl:
-           return rurl
-        return rest
-
    def print_version(self, url):
-        if '/news/' in url or '/blog/ in url':
-           return url
-        rurl = url.replace('http://www.businessweek.com/','http://www.businessweek.com/print/')
-        return rurl.replace('/investing/','/investor/')
+        soup = self.index_to_soup(url)
+        prntver = soup.find('li', attrs={'class':'print tracked'})
+        rurl = prntver.find('a', href=True)['href']
+        return rurl
+

-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        for alink in soup.findAll('a'):
-            if alink.string is not None:
-               tstr = alink.string
-               alink.replaceWith(tstr)
-        return soup
- 
--- a/recipes/bwmagazine2.recipe
+++ b/recipes/bwmagazine2.recipe
@ -11,8 +11,8 @@ class BusinessWeekMagazine(BasicNewsRecipe):
    category = 'news'
    encoding = 'UTF-8'
    keep_only_tags = [
-			dict(name='div', attrs={'id':'article_body_container'}),
-			]
+            dict(name='div', attrs={'id':'article_body_container'}),
+            ]
    remove_tags = [dict(name='ui'),dict(name='li'),dict(name='div', attrs={'id':['share-email']})]
    no_javascript = True
    no_stylesheets = True
@ -25,6 +25,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):

        #Find date
        mag=soup.find('h2',text='Magazine')
+        self.log(mag)
        dates=self.tag_to_string(mag.findNext('h3'))
        self.timefmt = u' [%s]'%dates

@ -32,7 +33,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
        div0 = soup.find ('div', attrs={'class':'column left'})
        section_title = ''
        feeds = OrderedDict()
-        for div in div0.findAll('h4'):
+        for div in div0.findAll(['h4','h5']):
            articles = []
            section_title = self.tag_to_string(div.findPrevious('h3')).strip()
            title=self.tag_to_string(div.a).strip()
@ -48,7 +49,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
                feeds[section_title] += articles
        div1 = soup.find ('div', attrs={'class':'column center'})
        section_title = ''
-        for div in div1.findAll('h5'):
+        for div in div1.findAll(['h4','h5']):
            articles = []
            desc=self.tag_to_string(div.findNext('p')).strip()
            section_title = self.tag_to_string(div.findPrevious('h3')).strip()
--- a/recipes/ciekawostki_historyczne.recipe
+++ b/recipes/ciekawostki_historyczne.recipe
@ -11,7 +11,8 @@ class Ciekawostki_Historyczne(BasicNewsRecipe):
    masthead_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
    cover_url = 'http://ciekawostkihistoryczne.pl/wp-content/themes/Wordpress_Magazine/images/logo-ciekawostki-historyczne-male.jpg'
    max_articles_per_feed = 100
-    oldest_article = 140000
+    extra_css = 'img.alignleft {float:left; margin-right:5px;} .alignright {float:right; margin-left:5px;}'
+    oldest_article = 12
    preprocess_regexps = [(re.compile(ur'Ten artykuł ma kilka stron.*?</fb:like>', re.DOTALL), lambda match: ''), (re.compile(ur'<h2>Zobacz też:</h2>.*?</ol>', re.DOTALL), lambda match: '')]
    no_stylesheets = True
    remove_empty_feeds = True
--- a/recipes/conowego_pl.recipe
+++ b/recipes/conowego_pl.recipe
@ -11,6 +11,7 @@ class CoNowegoPl(BasicNewsRecipe):
    oldest_article = 7
    max_articles_per_feed = 100
    INDEX = 'http://www.conowego.pl/'
+    extra_css = '.news-single-img {float:left; margin-right:5px;}'
    no_stylesheets = True
    remove_empty_feeds = True
    use_embedded_content = False
@ -35,7 +36,7 @@ class CoNowegoPl(BasicNewsRecipe):
                pagetext = soup2.find(attrs={'class':'ni_content'})
                pos = len(appendtag.contents)
                appendtag.insert(pos, pagetext)
-                
+
            comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
            for comment in comments:
                comment.extract()
--- a/recipes/countryfile.recipe
+++ b/recipes/countryfile.recipe
@ -7,13 +7,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    #cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
    __author__ = 'Dave Asbury'
    description           = 'The official website of Countryfile Magazine'
-    # last updated 8/12/12
+    # last updated 19/10/12
    language = 'en_GB'
    oldest_article = 30
    max_articles_per_feed = 25
    remove_empty_feeds = True
    no_stylesheets = True
    auto_cleanup = True
+    compress_news_images = True
    ignore_duplicate_articles = {'title', 'url'}
    #articles_are_obfuscated = True
    #article_already_exists = False
--- a/recipes/czas_gentlemanow.recipe
+++ b/recipes/czas_gentlemanow.recipe
@ -12,11 +12,13 @@ class CzasGentlemanow(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    oldest_article = 7
    max_articles_per_feed = 100
+    extra_css = '.gallery-item {float:left; margin-right: 10px; max-width: 20%;} .alignright {text-align: right; float:right; margin-left:5px;}\
+    .wp-caption-text {text-align: left;} img.aligncenter {display: block; margin-left: auto; margin-right: auto;} .alignleft {float: left; margin-right:5px;}'
    no_stylesheets = True
    remove_empty_feeds = True
    preprocess_regexps     = [(re.compile(u'<h3>Może Cię też zainteresować:</h3>'), lambda m: '')]
    use_embedded_content = False
    keep_only_tags = [dict(name='div', attrs={'class':'content'})]
-    remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails'])]
+    remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails', 'respond'])]
    remove_tags_after = dict(id='comments')
    feeds          = [(u'M\u0119ski \u015awiat', u'http://czasgentlemanow.pl/category/meski-swiat/feed/'), (u'Styl', u'http://czasgentlemanow.pl/category/styl/feed/'), (u'Vademecum Gentlemana', u'http://czasgentlemanow.pl/category/vademecum/feed/'), (u'Dom i rodzina', u'http://czasgentlemanow.pl/category/dom-i-rodzina/feed/'), (u'Honor', u'http://czasgentlemanow.pl/category/honor/feed/'), (u'Gad\u017cety Gentlemana', u'http://czasgentlemanow.pl/category/gadzety-gentlemana/feed/')]
--- a/recipes/daily_mirror.recipe
+++ b/recipes/daily_mirror.recipe
@ -13,9 +13,9 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):

    masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'

-
+    compress_news_images = True
    oldest_article = 1
-    max_articles_per_feed = 1
+    max_articles_per_feed = 12
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
--- a/recipes/diario_de_noticias.recipe
+++ b/recipes/diario_de_noticias.recipe
@ -0,0 +1,23 @@
+# vim:fileencoding=UTF-8
+
+from __future__ import unicode_literals
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1365070687(BasicNewsRecipe):
+  title ='Diário de Notícias'
+  oldest_article = 7
+  language = 'pt'
+  __author__ = 'Jose Pinto'
+  max_articles_per_feed = 100
+  keep_only_tags = [dict(name='div', attrs={'id':'cln-esqmid'}) ]
+  remove_tags    = [ dict(name='table', attrs={'class':'TabFerramentasInf'}) ]
+
+  feeds = [(u'Portugal', u'http://feeds.dn.pt/DN-Portugal'),
+    (u'Globo', u'http://feeds.dn.pt/DN-Globo'),
+    (u'Economia', u'http://feeds.dn.pt/DN-Economia'),
+    (u'Ci\xeancia', u'http://feeds.dn.pt/DN-Ciencia'),
+    (u'Artes', u'http://feeds.dn.pt/DN-Artes'),
+    (u'TV & Media', u'http://feeds.dn.pt/DN-Media'),
+    (u'Opini\xe3o', u'http://feeds.dn.pt/DN-Opiniao'),
+    (u'Pessoas', u'http://feeds.dn.pt/DN-Pessoas')
+    ]
--- a/recipes/dobreprogamy.recipe
+++ b/recipes/dobreprogamy.recipe
@ -16,6 +16,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
    extra_css      = '.title {font-size:22px;}'
    oldest_article = 8
    max_articles_per_feed = 100
+    remove_attrs = ['style', 'width', 'height']
    preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
    keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
    remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags  font-heading-master']}), dict(id='komentarze'), dict(name='iframe')]
@ -28,4 +29,11 @@ class Dobreprogramy_pl(BasicNewsRecipe):
        for a in soup('a'):
            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
                a['href']=self.index + a['href']
+        for r in soup.findAll('iframe'):
+            r.parent.extract()
        return soup
+    def postprocess_html(self, soup, first_fetch):
+        for r in soup.findAll('span', text=''):
+            if not r.string:
+                r.extract()
+        return soup
--- a/recipes/dzial_zagraniczny.recipe
+++ b/recipes/dzial_zagraniczny.recipe
@ -0,0 +1,27 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel <teepel44@gmail.com>'
+
+'''
+dzialzagraniczny.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class dzial_zagraniczny(BasicNewsRecipe):
+    title          = u'Dział Zagraniczny'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language       = 'pl'
+    description    = u'Polskiego czytelnika to nie interesuje'
+    INDEX = 'http://dzialzagraniczny.pl'
+    extra_css = 'img {display: block;}'
+    oldest_article = 7
+    cover_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-prn1/c145.5.160.160/559442_415653975115959_2126205128_n.jpg'
+    max_articles_per_feed = 100
+    remove_empty_feeds = True
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = True
+
+    feeds          = [(u'Dział zagraniczny', u'http://feeds.feedburner.com/dyndns/UOfz')]
--- a/recipes/dzieje_pl.recipe
+++ b/recipes/dzieje_pl.recipe
@ -9,6 +9,7 @@ class Dzieje(BasicNewsRecipe):
    category       = 'history'
    language       = 'pl'
    ignore_duplicate_articles = {'title', 'url'}
+    extra_css = '.imagecache-default {float:left; margin-right:20px;}'
    index = 'http://dzieje.pl'
    oldest_article = 8
    max_articles_per_feed = 100
--- a/recipes/eclipseonline.recipe
+++ b/recipes/eclipseonline.recipe
@ -0,0 +1,38 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+class EclipseOnline(BasicNewsRecipe):
+	
+	#
+	# oldest_article specifies the maximum age, in days, of posts to retrieve.
+	# The default of 32 is intended to work well with a "days of month = 1"
+	# recipe schedule to download "monthly issues" of Eclipse Online.
+	# Increase this value to include additional posts. However, the RSS feed
+	# currently only includes the 10 most recent posts, so that's the max.
+	#
+	oldest_article = 32
+	
+	title = u'Eclipse Online'
+	description = u'"Where strange and wonderful things happen, where reality is eclipsed for a little while with something magical and new." Eclipse Online is edited by Jonathan Strahan and published online by Night Shade Books. http://www.nightshadebooks.com/category/eclipse/'
+	publication_type = 'magazine'
+	language = 'en'
+	
+	__author__ = u'Jim DeVona'
+	__version__ = '1.0'
+	
+	# For now, use this Eclipse Online logo as the ebook cover image.
+	# (Disable the cover_url line to let Calibre generate a default cover, including date.)
+	cover_url = 'http://www.nightshadebooks.com/wp-content/uploads/2012/10/Eclipse-Logo.jpg'
+		
+	# Extract the "post" div containing the story (minus redundant metadata) from each page.
+	keep_only_tags = [dict(name='div', attrs={'class':lambda x: x and 'post' in x})]
+	remove_tags = [dict(name='span', attrs={'class': ['post-author', 'post-category', 'small']})]
+
+	# Nice plain markup (like Eclipse's) works best for most e-readers.
+	# Disregard any special styling rules, but center illustrations.
+	auto_cleanup = False
+	no_stylesheets = True
+	remove_attributes = ['style', 'align']
+	extra_css = '.wp-caption {text-align: center;} .wp-caption-text {font-size: small; font-style: italic;}'
+	
+	# Tell Calibre where to look for article links. It will proceed to retrieve
+	# these posts and format them into an ebook according to the above rules.
+	feeds = ['http://www.nightshadebooks.com/category/eclipse/feed/']
--- a/recipes/ekologia_pl.recipe
+++ b/recipes/ekologia_pl.recipe
@ -9,7 +9,7 @@ class EkologiaPl(BasicNewsRecipe):
    language       = 'pl'
    cover_url = 'http://www.ekologia.pl/assets/images/logo/ekologia_pl_223x69.png'
    ignore_duplicate_articles = {'title', 'url'}
-    extra_css = '.title {font-size: 200%;}'
+    extra_css = '.title {font-size: 200%;} .imagePowiazane, .imgCon {float:left; margin-right:5px;}'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
--- a/recipes/el_diplo.recipe
+++ b/recipes/el_diplo.recipe
@ -26,7 +26,7 @@ class ElDiplo_Recipe(BasicNewsRecipe):
    title = u'El Diplo'
    __author__ = 'Tomas Di Domenico'
    description = 'Publicacion mensual de Le Monde Diplomatique, edicion Argentina'
-    langauge = 'es_AR'
+    language = 'es_AR'
    needs_subscription = True
    auto_cleanup = True

--- a/recipes/equipped.recipe
+++ b/recipes/equipped.recipe
@ -0,0 +1,29 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel <teepel44@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
+
+'''
+equipped.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+class equipped(BasicNewsRecipe):
+    title          = u'Equipped'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language       = 'pl'
+    description = u'Wiadomości z equipped.pl'
+    INDEX = 'http://equipped.pl'
+    extra_css = '.alignleft {float:left; margin-right:5px;}'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_empty_feeds = True
+    simultaneous_downloads = 5
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = False
+    #keep_only_tags = [dict(name='article')]
+    #remove_tags = [dict(id='disqus_thread')]
+    #remove_tags_after = [dict(id='disqus_thread')]
+
+    feeds          = [(u'Equipped', u'http://feeds.feedburner.com/Equippedpl?format=xml')]
--- a/recipes/esensja_(rss).recipe
+++ b/recipes/esensja_(rss).recipe
@ -12,12 +12,6 @@ class EsensjaRSS(BasicNewsRecipe):
    language       = 'pl'
    encoding = 'utf-8'
    INDEX = 'http://www.esensja.pl'
-    extra_css = '''.t-title {font-size: x-large; font-weight: bold; text-align: left}
-                    .t-author {font-size: x-small; text-align: left}
-                    .t-title2 {font-size: x-small; font-style: italic; text-align: left}
-                    .text {font-size: small; text-align: left}
-                    .annot-ref {font-style: italic; text-align: left}
-                    '''
    cover_url = ''
    masthead_url = 'http://esensja.pl/img/wrss.gif'
    use_embedded_content = False
--- a/recipes/film_org_pl.recipe
+++ b/recipes/film_org_pl.recipe
@ -1,19 +1,54 @@
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Comment
 import re
 class FilmOrgPl(BasicNewsRecipe):
-    title          = u'Film.org.pl'
-    __author__        = 'fenuks'
-    description   = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
-    category       = 'film'
-    language       = 'pl'
+    title = u'Film.org.pl'
+    __author__ = 'fenuks'
+    description = u"Recenzje, analizy, artykuły, rankingi - wszystko o filmie dla miłośników kina. Opisy efektów specjalnych, wersji reżyserskich, remake'ów, sequeli. No i forum filmowe. Jedne z największych w Polsce."
+    category = 'film'
+    language = 'pl'
+    extra_css = '.alignright {float:right; margin-left:5px;} .alignleft {float:left; margin-right:5px;} .recenzja-title {font-size: 150%; margin-top: 5px; margin-bottom: 5px;}'
    cover_url = 'http://film.org.pl/wp-content/themes/KMF/images/logo_kmf10.png'
    ignore_duplicate_articles = {'title', 'url'}
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
+    remove_javascript = True
    remove_empty_feeds = True
-    use_embedded_content = True
-    preprocess_regexps     = [(re.compile(ur'<h3>Przeczytaj także:</h3>.*', re.IGNORECASE|re.DOTALL), lambda m: '</body>'), (re.compile(ur'<div>Artykuł</div>', re.IGNORECASE), lambda m: ''), (re.compile(ur'<div>Ludzie filmu</div>', re.IGNORECASE), lambda m: '')]
-    remove_tags = [dict(name='img', attrs={'alt':['Ludzie filmu', u'Artykuł']})]
-    feeds          = [(u'Recenzje', u'http://film.org.pl/r/recenzje/feed/'), (u'Artyku\u0142', u'http://film.org.pl/a/artykul/feed/'), (u'Analiza', u'http://film.org.pl/a/analiza/feed/'), (u'Ranking', u'http://film.org.pl/a/ranking/feed/'), (u'Blog', u'http://film.org.pl/kmf/blog/feed/'), (u'Ludzie', u'http://film.org.pl/a/ludzie/feed/'), (u'Seriale', u'http://film.org.pl/a/seriale/feed/'), (u'Oceanarium', u'http://film.org.pl/a/ocenarium/feed/'), (u'VHS', u'http://film.org.pl/a/vhs-a/feed/')]
+    use_embedded_content = False
+    remove_attributes = ['style']
+    preprocess_regexps = [(re.compile(ur'<h3>Przeczytaj także:</h3>.*', re.IGNORECASE|re.DOTALL), lambda m: '</body>'), (re.compile(ur'</?center>', re.IGNORECASE|re.DOTALL), lambda m: ''), (re.compile(ur'<div>Artykuł</div>', re.IGNORECASE), lambda m: ''), (re.compile(ur'<div>Ludzie filmu</div>', re.IGNORECASE), lambda m: ''), (re.compile(ur'(<br ?/?>\s*?){2,}', re.IGNORECASE|re.DOTALL), lambda m: '')]
+    keep_only_tags = [dict(name=['h11', 'h16', 'h17']), dict(attrs={'class':'editor'})]
+    remove_tags_after = dict(id='comments')
+    remove_tags = [dict(name=['link', 'meta', 'style']), dict(name='img', attrs={'alt':['Ludzie filmu', u'Artykuł']}), dict(id='comments'), dict(attrs={'style':'border: 0pt none ; margin: 0pt; padding: 0pt;'}), dict(name='p', attrs={'class':'rating'}), dict(attrs={'layout':'button_count'})]
+    feeds = [(u'Recenzje', u'http://film.org.pl/r/recenzje/feed/'), (u'Artyku\u0142', u'http://film.org.pl/a/artykul/feed/'), (u'Analiza', u'http://film.org.pl/a/analiza/feed/'), (u'Ranking', u'http://film.org.pl/a/ranking/feed/'), (u'Blog', u'http://film.org.pl/kmf/blog/feed/'), (u'Ludzie', u'http://film.org.pl/a/ludzie/feed/'), (u'Seriale', u'http://film.org.pl/a/seriale/feed/'), (u'Oceanarium', u'http://film.org.pl/a/ocenarium/feed/'), (u'VHS', u'http://film.org.pl/a/vhs-a/feed/')]
+
+    def append_page(self, soup, appendtag):
+        tag = soup.find('div', attrs={'class': 'pagelink'})
+        if tag:
+            for nexturl in tag.findAll('a'):
+                url = nexturl['href']
+                soup2 = self.index_to_soup(url)
+                pagetext = soup2.find(attrs={'class': 'editor'})
+                comments = pagetext.findAll(text=lambda text:isinstance(text, Comment))
+                for comment in comments:
+                    comment.extract()
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, pagetext)
+            for r in appendtag.findAll(attrs={'class': 'pagelink'}):
+                r.extract()
+            for r in appendtag.findAll(attrs={'id': 'comments'}):
+                r.extract()
+            for r in appendtag.findAll(attrs={'style':'border: 0pt none ; margin: 0pt; padding: 0pt;'}):
+                r.extract()
+            for r in appendtag.findAll(attrs={'layout':'button_count'}):
+                r.extract()
+                
+    def preprocess_html(self, soup):
+        for c in soup.findAll('h11'):
+            c.name = 'h1'
+        self.append_page(soup, soup.body)
+        for r in soup.findAll('br'):
+            r.extract()
+        return soup
--- a/recipes/film_web.recipe
+++ b/recipes/film_web.recipe
@ -10,7 +10,6 @@ class FilmWebPl(BasicNewsRecipe):
    category       = 'movies'
    language       = 'pl'
    index = 'http://www.filmweb.pl'
-    #extra_css = '.MarkupPhotoHTML-7 {float:left; margin-right: 10px;}'
    oldest_article = 8
    max_articles_per_feed = 100
    no_stylesheets = True
@ -19,9 +18,9 @@ class FilmWebPl(BasicNewsRecipe):
    remove_javascript = True
    preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), (re.compile(ur'(<br ?/?>\s*?<br ?/?>\s*?)+', re.IGNORECASE), lambda m: '<br />')]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')]
    extra_css      = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
-    remove_tags = [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
+    #remove_tags = [dict()]
    remove_attributes = ['style',]
-    keep_only_tags = [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
+    keep_only_tags = [dict(attrs={'class':['hdr hdr-super', 'newsContent']})]
    feeds          = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
                         (u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
                         (u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
@ -44,12 +43,12 @@ class FilmWebPl(BasicNewsRecipe):
        skip_tag = soup.find('a', attrs={'class':'welcomeScreenButton'})
        if skip_tag is not None:
            return self.index_to_soup(skip_tag['href'], raw=True)
-            
+
    def postprocess_html(self, soup, first_fetch):
        for r in soup.findAll(attrs={'class':'singlephoto'}):
            r['style'] = 'float:left; margin-right: 10px;'
        return soup
-        
+
    def preprocess_html(self, soup):
        for a in soup('a'):
            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
@ -59,11 +58,6 @@ class FilmWebPl(BasicNewsRecipe):
        for i in soup.findAll('sup'):
            if not i.string or i.string.startswith('(kliknij'):
                i.extract()
-        tag = soup.find(name='ul', attrs={'class':'inline sep-line'})
-        if tag:
-            tag.name = 'div'
-            for t in tag.findAll('li'):
-                t.name = 'div'
        for r in soup.findAll(id=re.compile('photo-\d+')):
            r.extract()
        for r in soup.findAll(style=re.compile('float: ?left')):
--- a/recipes/financial_times_uk.recipe
+++ b/recipes/financial_times_uk.recipe
@ -8,6 +8,7 @@ import datetime
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
+from collections import OrderedDict

 class FinancialTimes(BasicNewsRecipe):
    title                 = 'Financial Times (UK)'
@ -93,7 +94,7 @@ class FinancialTimes(BasicNewsRecipe):
            try:
                urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
            except:
-                continue 
+                continue
            title = self.tag_to_string(item)
            date = strftime(self.timefmt)
            articles.append({
@ -105,29 +106,28 @@ class FinancialTimes(BasicNewsRecipe):
        return articles

    def parse_index(self):
-        feeds = []
+        feeds = OrderedDict()
        soup = self.index_to_soup(self.INDEX)
-        dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
-        self.timefmt = ' [%s]'%dates
-        wide = soup.find('div',attrs={'class':'wide'})
-        if not wide:
-           return feeds
-        allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()})
-        if not allsections:
-           return feeds
-        count = 0
-        for item in allsections:
-            count = count + 1
-            if self.test and count > 2:
-               return feeds
-            fitem = item.h3
-            if not fitem:
-               fitem = item.h4
-            ftitle = self.tag_to_string(fitem)   
-            self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
-            feedarts = self.get_artlinks(item.ul)
-            feeds.append((ftitle,feedarts))
-        return feeds
+        #dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
+        #self.timefmt = ' [%s]'%dates
+
+        for column in soup.findAll('div', attrs = {'class':'feedBoxes clearfix'}):
+            for section in column. findAll('div', attrs = {'class':'feedBox'}):
+                section_title=self.tag_to_string(section.find('h4'))
+                for article in section.ul.findAll('li'):
+                    articles = []
+                    title=self.tag_to_string(article.a)
+                    url=article.a['href']
+                    articles.append({'title':title, 'url':url, 'description':'', 'date':''})
+
+                    if articles:
+                        if section_title not in feeds:
+                            feeds[section_title] = []
+                        feeds[section_title] += articles
+
+
+        ans = [(key, val) for key, val in feeds.iteritems()]
+        return ans

    def preprocess_html(self, soup):
        items = ['promo-box','promo-title',
@ -174,9 +174,6 @@ class FinancialTimes(BasicNewsRecipe):
            count += 1
        tfile = PersistentTemporaryFile('_fa.html')
        tfile.write(html)
-        tfile.close()        
+        tfile.close()
        self.temp_files.append(tfile)
        return tfile.name
-
-    def cleanup(self):
-        self.browser.open('https://registration.ft.com/registration/login/logout?location=')
--- a/recipes/focus_pl.recipe
+++ b/recipes/focus_pl.recipe
@ -1,12 +1,12 @@
+#!/usr/bin/env python
+__license__ = 'GPL v3'
+
 import re
-
 from calibre.web.feeds.news import BasicNewsRecipe

-
 class FocusRecipe(BasicNewsRecipe):

-    __license__ = 'GPL v3'
-    __author__ = u'intromatyk <intromatyk@gmail.com>'
+    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
    language = 'pl'
    version = 1

--- a/recipes/forbes_pl.recipe
+++ b/recipes/forbes_pl.recipe
@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import datetime
+import re
+
+class forbes_pl(BasicNewsRecipe):
+    title = u'Forbes.pl'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+    description = u'Biznes, finanse, gospodarka, strategie, wiadomości gospodarcze, analizy finasowe i strategiczne.'
+    oldest_article = 1
+    index = 'http://www.forbes.pl'
+    cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
+    max_articles_per_feed = 100
+    extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
+    preprocess_regexps = [(re.compile(ur'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
+    remove_javascript = True
+    no_stylesheets = True
+    now = datetime.datetime.now()
+    yesterday = now - datetime.timedelta(hours=24)
+    yesterday = yesterday.strftime("%d.%m.%Y %H:%M:%S")
+    pages_count = 4
+    keep_only_tags = [dict(attrs={'class':['Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})]
+    remove_tags = [dict(attrs={'class':['Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})]
+
+    feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')]
+
+    '''def preprocess_html(self, soup):
+        self.append_page(soup, soup.body)
+        return soup
+
+
+    def append_page(self, soup, appendtag):
+        cleanup = False
+        nexturl = appendtag.find('a', attrs={'class':'next'})
+        if nexturl:
+            cleanup = True
+        while nexturl:
+            soup2 = self.index_to_soup(self.index + nexturl['href'])
+            nexturl = soup2.find('a', attrs={'class':'next'})
+            pagetext = soup2.findAll(id='article-body-wrapper')
+            if not pagetext:
+                pagetext = soup2.findAll(attrs={'class':'Article-Entry Styled'})
+            for comment in pagetext.findAll(text=lambda text:isinstance(text, Comment)):
+                comment.extract()
+            pos = len(appendtag.contents)
+            appendtag.insert(pos, pagetext)
+        if cleanup:
+            for r in appendtag.findAll(attrs={'class':'paginator'}):
+                r.extract()'''
--- a/recipes/foreignaffairs.recipe
+++ b/recipes/foreignaffairs.recipe
@ -1,6 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
-from calibre.ptempfile import PersistentTemporaryFile

 class ForeignAffairsRecipe(BasicNewsRecipe):
    ''' there are three modifications:
@ -45,7 +44,6 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
                          'publisher': publisher}

    temp_files = []
-    articles_are_obfuscated = True

    def get_cover_url(self):
        soup = self.index_to_soup(self.FRONTPAGE)
@ -53,20 +51,6 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
        img_url =  div.find('img')['src']
        return self.INDEX + img_url

-    def get_obfuscated_article(self, url):
-        br = self.get_browser()
-        br.open(url)
-
-        response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0)
-        html = response.read()
-
-        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
-        self.temp_files[-1].write(html)
-        self.temp_files[-1].close()
-
-        return self.temp_files[-1].name
-
-
    def parse_index(self):

        answer = []
@ -89,10 +73,10 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
                        if div.find('a') is not None:
                            originalauthor=self.tag_to_string(div.findNext('div', attrs = {'class':'views-field-field-article-book-nid'}).div.a)
                            title=subsectiontitle+': '+self.tag_to_string(div.span.a)+' by '+originalauthor
-                            url=self.INDEX+div.span.a['href']
+                            url=self.INDEX+self.index_to_soup(self.INDEX+div.span.a['href']).find('a', attrs={'class':'fa_addthis_print'})['href']
                            atr=div.findNext('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
                            if atr is not None:
-                                author=self.tag_to_string(atr.span.a)
+                                author=self.tag_to_string(atr.span)
                            else:
                                author=''
                            desc=div.findNext('span', attrs = {'class': 'views-field-field-article-summary-value'})
@ -106,10 +90,10 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
                for div in sec.findAll('div', attrs = {'class': 'views-field-title'}):
                    if div.find('a') is not None:
                        title=self.tag_to_string(div.span.a)
-                        url=self.INDEX+div.span.a['href']
+                        url=self.INDEX+self.index_to_soup(self.INDEX+div.span.a['href']).find('a', attrs={'class':'fa_addthis_print'})['href']
                        atr=div.findNext('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
                        if atr is not None:
-                            author=self.tag_to_string(atr.span.a)
+                            author=self.tag_to_string(atr.span)
                        else:
                            author=''
                        desc=div.findNext('span', attrs = {'class': 'views-field-field-article-summary-value'})
@ -119,7 +103,7 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
                            description=''
                        articles.append({'title':title, 'date':None, 'url':url, 'description':description, 'author':author})
            if articles:
-                    answer.append((section, articles))
+                answer.append((section, articles))
        return answer

    def preprocess_html(self, soup):
--- a/recipes/fortune_magazine.recipe
+++ b/recipes/fortune_magazine.recipe
@ -0,0 +1,75 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+from collections import OrderedDict
+
+class Fortune(BasicNewsRecipe):
+
+    title       = 'Fortune Magazine'
+    __author__  = 'Rick Shang'
+
+    description = 'FORTUNE is a global business magazine that has been revered in its content and credibility since 1930. FORTUNE covers the entire field of business, including specific companies and business trends, prominent business leaders, and new ideas shaping the global marketplace.'
+    language = 'en'
+    category = 'news'
+    encoding = 'UTF-8'
+    keep_only_tags = [dict(attrs={'id':['storycontent']})]
+    remove_tags = [dict(attrs={'class':['hed_side','socialMediaToolbarContainer']})]
+    no_javascript = True
+    no_stylesheets = True
+    needs_subscription = True
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        br.open('http://money.cnn.com/2013/03/21/smallbusiness/legal-marijuana-startups.pr.fortune/index.html')
+        br.select_form(name="paywall-form")
+        br['email'] = self.username
+        br['password'] = self.password
+        br.submit()
+        return br
+
+    def parse_index(self):
+        articles = []
+        soup0 = self.index_to_soup('http://money.cnn.com/magazines/fortune/')
+
+        #Go to the latestissue
+        soup = self.index_to_soup(soup0.find('div',attrs={'class':'latestissue'}).find('a',href=True)['href'])
+
+        #Find cover & date
+        cover_item = soup.find('div', attrs={'id':'cover-story'})
+        cover = cover_item.find('img',src=True)
+        self.cover_url = cover['src']
+        date = self.tag_to_string(cover_item.find('div', attrs={'class':'tocDate'})).strip()
+        self.timefmt = u' [%s]'%date
+
+
+        feeds = OrderedDict()
+        section_title = ''
+
+        #checkout the cover story
+        articles = []
+        coverstory=soup.find('div', attrs={'class':'cnnHeadline'})
+        title=self.tag_to_string(coverstory.a).strip()
+        url=coverstory.a['href']
+        desc=self.tag_to_string(coverstory.findNext('p', attrs={'class':'cnnBlurbTxt'}))
+        articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
+        feeds['Cover Story'] = []
+        feeds['Cover Story'] += articles
+
+        for post in soup.findAll('div', attrs={'class':'cnnheader'}):
+            section_title = self.tag_to_string(post).strip()
+            articles = []
+
+            ul=post.findNext('ul')
+            for link in ul.findAll('li'):
+                links=link.find('h2')
+                title=self.tag_to_string(links.a).strip()
+                url=links.a['href']
+                desc=self.tag_to_string(link.find('p', attrs={'class':'cnnBlurbTxt'}))
+                articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
+
+            if articles:
+                if section_title not in feeds:
+                            feeds[section_title] = []
+                feeds[section_title] += articles
+
+        ans = [(key, val) for key, val in feeds.iteritems()]
+        return ans
+
--- a/recipes/galaxys_edge.recipe
+++ b/recipes/galaxys_edge.recipe
@ -0,0 +1,108 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class GalaxyEdge(BasicNewsRecipe):
+    title                 = u'The Galaxy\'s Edge'
+    language = 'en'
+
+    oldest_article        = 7
+    __author__            = 'Krittika Goyal'
+    no_stylesheets = True
+
+    auto_cleanup = True
+
+    #keep_only_tags = [dict(id='content')]
+    #remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
+            #dict(id=['email-section', 'right-column', 'printfooter', 'topover',
+                     #'slidebox', 'th_footer'])]
+
+    extra_css = '.photo-caption { font-size: smaller }'
+
+    def parse_index(self):
+        soup = self.index_to_soup('http://www.galaxysedge.com/')
+        main = soup.find('table', attrs={'width':'911'})
+        toc = main.find('td', attrs={'width':'225'})
+
+
+
+        current_section = None
+        current_articles = []
+        feeds = []
+        c = 0
+        for x in toc.findAll(['p']):
+            c = c+1
+            if c == 5:
+                if current_articles and current_section:
+                    feeds.append((current_section, current_articles))
+                edwo = x.find('a')
+                current_section = self.tag_to_string(edwo)
+                current_articles = []
+                self.log('\tFound section:', current_section)
+                title = self.tag_to_string(edwo)
+                url = edwo.get('href', True)
+                url = 'http://www.galaxysedge.com/'+url
+                print(title)
+                print(c)
+                if not url or not title:
+                    continue
+                self.log('\t\tFound article:', title)
+                self.log('\t\t\t', url)
+                current_articles.append({'title': title, 'url':url,
+                    'description':'', 'date':''})
+            elif c>5:
+                current_section = self.tag_to_string(x.find('b'))
+                current_articles = []
+                self.log('\tFound section:', current_section)
+                for y in x.findAll('a'):
+                    title = self.tag_to_string(y)
+                    url = y.get('href', True)
+                    url = 'http://www.galaxysedge.com/'+url
+                    print(title)
+                    if not url or not title:
+                        continue
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    current_articles.append({'title': title, 'url':url,
+                        'description':'', 'date':''})
+            if current_articles and current_section:
+                 feeds.append((current_section, current_articles))
+
+        return feeds
+
+
+
+
+    #def preprocess_raw_html(self, raw, url):
+        #return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
+
+    #def postprocess_html(self, soup, first_fetch):
+        #for t in soup.findAll(['table', 'tr', 'td','center']):
+            #t.name = 'div'
+        #return soup
+
+    #def parse_index(self):
+        #today = time.strftime('%Y-%m-%d')
+        #soup = self.index_to_soup(
+                #'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
+        #div = soup.find(id='left-column')
+        #feeds = []
+        #current_section = None
+        #current_articles = []
+        #for x in div.findAll(['h3', 'div']):
+            #if current_section and x.get('class', '') == 'tpaper':
+                #a = x.find('a', href=True)
+                #if a is not None:
+                    #current_articles.append({'url':a['href']+'?css=print',
+                        #'title':self.tag_to_string(a), 'date': '',
+                        #'description':''})
+            #if x.name == 'h3':
+                #if current_section and current_articles:
+                    #feeds.append((current_section, current_articles))
+                #current_section = self.tag_to_string(x)
+                #current_articles = []
+        #return feeds
+
+
--- a/recipes/gazeta-prawna-calibre-v1.recipe
+++ b/recipes/gazeta-prawna-calibre-v1.recipe
@ -14,13 +14,14 @@ class gazetaprawna(BasicNewsRecipe):
    title          = u'Gazeta Prawna'
    __author__ = u'Vroo'
    publisher      = u'Infor Biznes'
-    oldest_article = 7
+    oldest_article = 1
    max_articles_per_feed = 20
    no_stylesheets = True
    remove_javascript = True
    description = 'Polski dziennik gospodarczy'
    language = 'pl'
    encoding = 'utf-8'
+    ignore_duplicate_articles = {'title', 'url'}

    remove_tags_after = [
               dict(name='div', attrs={'class':['data-art']})
@ -30,7 +31,7 @@ class gazetaprawna(BasicNewsRecipe):
    ]

    feeds = [
-      (u'Wiadomo\u015bci - najwa\u017cniejsze', u'http://www.gazetaprawna.pl/wiadomosci/najwazniejsze/rss.xml'),
+      (u'Z ostatniej chwili', u'http://rss.gazetaprawna.pl/GazetaPrawna'),
      (u'Biznes i prawo gospodarcze', u'http://biznes.gazetaprawna.pl/rss.xml'),
      (u'Prawo i wymiar sprawiedliwo\u015bci', u'http://prawo.gazetaprawna.pl/rss.xml'),
      (u'Praca i ubezpieczenia', u'http://praca.gazetaprawna.pl/rss.xml'),
@ -51,3 +52,8 @@ class gazetaprawna(BasicNewsRecipe):
        url = url.replace('prawo.gazetaprawna', 'www.gazetaprawna')
        url = url.replace('praca.gazetaprawna', 'www.gazetaprawna')
        return url
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('http://www.egazety.pl/infor/e-wydanie-dziennik-gazeta-prawna.html')
+        self.cover_url = soup.find('p', attrs={'class':'covr'}).a['href']
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/gazeta_pl_krakow.recipe
+++ b/recipes/gazeta_pl_krakow.recipe
@ -10,7 +10,7 @@ krakow.gazeta.pl
 from calibre.web.feeds.news import BasicNewsRecipe

 class gw_krakow(BasicNewsRecipe):
-    title          = u'Gazeta.pl Kraków'
+    title          = u'Gazeta Wyborcza Kraków'
    __author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
    language       = 'pl'
    description =u'Wiadomości z Krakowa na portalu Gazeta.pl.'
--- a/recipes/gazeta_pl_szczecin.recipe
+++ b/recipes/gazeta_pl_szczecin.recipe
@ -5,7 +5,7 @@ import string
 from calibre.web.feeds.news import BasicNewsRecipe

 class GazetaPlSzczecin(BasicNewsRecipe):
-    title          = u'Gazeta.pl Szczecin'
+    title          = u'Gazeta Wyborcza Szczecin'
    description    = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
    __author__     = u'Michał Szkutnik'
    __license__    = u'GPL v3'
--- a/recipes/gazeta_pl_warszawa.recipe
+++ b/recipes/gazeta_pl_warszawa.recipe
@ -10,7 +10,7 @@ warszawa.gazeta.pl
 from calibre.web.feeds.news import BasicNewsRecipe

 class gw_wawa(BasicNewsRecipe):
-    title          = u'Gazeta.pl Warszawa'
+    title          = u'Gazeta Wyborcza Warszawa'
    __author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
    language       = 'pl'
    description ='Wiadomości z Warszawy na portalu Gazeta.pl.'
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import Comment

 class Gazeta_Wyborcza(BasicNewsRecipe):
-    title = u'Gazeta.pl'
+    title = u'Gazeta Wyborcza'
    __author__ = 'fenuks, Artur Stachecki'
    language = 'pl'
    description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
--- a/recipes/gofin_pl.recipe
+++ b/recipes/gofin_pl.recipe
@ -0,0 +1,26 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel <teepel44@gmail.com>'
+
+'''
+gofin.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class gofin(BasicNewsRecipe):
+    title          = u'Gofin'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language       = 'pl'
+    description =u'Portal Podatkowo-Księgowy'
+    INDEX='http://gofin.pl'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_empty_feeds= True
+    simultaneous_downloads = 5
+    remove_javascript=True
+    no_stylesheets=True
+    auto_cleanup = True
+
+    feeds          = [(u'Podatki', u'http://www.rss.gofin.pl/podatki.xml'), (u'Prawo Pracy', u'http://www.rss.gofin.pl/prawopracy.xml'), (u'Rachunkowo\u015b\u0107', u'http://www.rss.gofin.pl/rachunkowosc.xml'), (u'Sk\u0142adki, zasi\u0142ki, emerytury', u'http://www.rss.gofin.pl/zasilki.xml'),(u'Firma', u'http://www.rss.gofin.pl/firma.xml'), (u'Prawnik radzi', u'http://www.rss.gofin.pl/prawnikradzi.xml')]
--- a/recipes/gram_pl.recipe
+++ b/recipes/gram_pl.recipe
@ -2,22 +2,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup

 class Gram_pl(BasicNewsRecipe):
-    title          = u'Gram.pl'
-    __author__        = 'fenuks'
-    description   = u'Serwis społecznościowy o grach: recenzje, newsy, zapowiedzi, encyklopedia gier, forum. Gry PC, PS3, X360, PS Vita, sprzęt dla graczy.'
-    category       = 'games'
-    language       = 'pl'
+    title = u'Gram.pl'
+    __author__ = 'fenuks'
+    description = u'Serwis społecznościowy o grach: recenzje, newsy, zapowiedzi, encyklopedia gier, forum. Gry PC, PS3, X360, PS Vita, sprzęt dla graczy.'
+    category = 'games'
+    language = 'pl'
    oldest_article = 8
    index='http://www.gram.pl'
    max_articles_per_feed = 100
    ignore_duplicate_articles = {'title', 'url'}
    no_stylesheets= True
    remove_empty_feeds = True
-    #extra_css = 'h2 {font-style: italic;  font-size:20px;} .picbox div {float: left;}'
+    #extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
    cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
    keep_only_tags= [dict(id='articleModule')]
-    remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']})]
-    feeds          = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
+    remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']}), dict(name='aside')]
+    feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
                        (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')
                        ]

@ -46,4 +46,4 @@ class Gram_pl(BasicNewsRecipe):
        tag=soup.find(name='span', attrs={'class':'platforma'})
        if tag:
           tag.name = 'p'
-        return soup
+        return soup
--- a/recipes/harpers_full.recipe
+++ b/recipes/harpers_full.recipe
@ -77,10 +77,9 @@ class Harpers_full(BasicNewsRecipe):
        self.timefmt =  u' [%s]'%date

        #get cover
-        coverurl='http://harpers.org/wp-content/themes/harpers/ajax_microfiche.php?img=harpers-'+re.split('harpers.org/',currentIssue_url)[1]+'gif/0001.gif'
-        soup2 = self.index_to_soup(coverurl)
-        self.cover_url = self.tag_to_string(soup2.find('img')['src'])
+        self.cover_url = soup1.find('div', attrs = {'class':'picture_hp'}).find('img', src=True)['src']         
        self.log(self.cover_url)
+        
        articles = []
        count = 0
        for item in soup1.findAll('div', attrs={'class':'articleData'}):
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@ -1,6 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-import re
-from datetime import date, timedelta

 class HBR(BasicNewsRecipe):

@ -11,23 +9,18 @@ class HBR(BasicNewsRecipe):
    timefmt                = ' [%B %Y]'
    language = 'en'
    no_stylesheets = True
-    # recipe_disabled = ('hbr.org has started requiring the use of javascript'
-    #         ' to log into their website. This is unsupported in calibre, so'
-    #         ' this recipe has been disabled. If you would like to see '
-    #         ' HBR supported in calibre, contact hbr.org and ask them'
-    #         ' to provide a javascript free login method.')

    LOGIN_URL = 'https://hbr.org/login?request_url=/'
    LOGOUT_URL = 'https://hbr.org/logout?request_url=/'

-    INDEX = 'http://hbr.org/archive-toc/BR'
+    INDEX = 'http://hbr.org'

    keep_only_tags = [dict(name='div', id='pageContainer')]
    remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
        'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
        'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
        'mailingListTout', 'partnerCenter', 'pageFooter',
-        'superNavHeadContainer', 'hbrDisqus',
+        'superNavHeadContainer', 'hbrDisqus', 'article-toolbox',
        'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
        dict(name='iframe')]
    extra_css = '''
@ -57,22 +50,6 @@ class HBR(BasicNewsRecipe):
        if url.endswith('/ar/1'):
            return url[:-1]+'pr'

-    def hbr_get_toc(self):
-        # return self.index_to_soup(open('/t/toc.html').read())
-
-        today = date.today()
-        future = today + timedelta(days=30)
-        past = today - timedelta(days=30)
-        for x in [x.strftime('%y%m') for x in (future, today, past)]:
-            url = self.INDEX + x
-            soup = self.index_to_soup(url)
-            if (not soup.find(text='Issue Not Found') and not soup.find(
-                text="We're Sorry.  There was an error processing your request")
-                and 'Exception: java.io.FileNotFoundException' not in
-                unicode(soup)):
-                return soup
-        raise Exception('Could not find current issue')
-
    def hbr_parse_toc(self, soup):
        feeds = []
        current_section = None
@ -105,23 +82,19 @@ class HBR(BasicNewsRecipe):

                articles.append({'title':title, 'url':url, 'description':desc,
                    'date':''})
+
+        if current_section is not None and articles:
+            feeds.append((current_section, articles))
        return feeds

    def parse_index(self):
-        soup = self.hbr_get_toc()
-        # open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
+        soup0 = self.index_to_soup('http://hbr.org/magazine')
+        datencover = soup0.find('ul', attrs={'id':'magazineArchiveCarousel'}).findAll('li')[-1]
+        #find date & cover
+        self.cover_url=datencover.img['src']
+        dates=self.tag_to_string(datencover.img['alt'])
+        self.timefmt = u' [%s]'%dates
+        soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs = {'class':'magazine_page'}).a['href'])
        feeds = self.hbr_parse_toc(soup)
        return feeds

-    def get_cover_url(self):
-        cover_url = None
-        index = 'http://hbr.org/current'
-        soup = self.index_to_soup(index)
-        link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
-
-        if link_item:
-           cover_url = 'http://hbr.org' + link_item['src']
-
-        return cover_url
-
-
--- a/recipes/hindu.recipe
+++ b/recipes/hindu.recipe
@ -2,7 +2,6 @@ from __future__ import with_statement
 __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'

-import time
 from calibre.web.feeds.news import BasicNewsRecipe

 class TheHindu(BasicNewsRecipe):
@ -14,44 +13,42 @@ class TheHindu(BasicNewsRecipe):
    max_articles_per_feed = 100
    no_stylesheets = True

-    keep_only_tags = [dict(id='content')]
-    remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
-            dict(id=['email-section', 'right-column', 'printfooter', 'topover',
-                     'slidebox', 'th_footer'])]
+    auto_cleanup = True
+

    extra_css = '.photo-caption { font-size: smaller }'

-    def preprocess_raw_html(self, raw, url):
-        return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
-
-    def postprocess_html(self, soup, first_fetch):
-        for t in soup.findAll(['table', 'tr', 'td','center']):
-            t.name = 'div'
-        return soup
-
    def parse_index(self):
-        today = time.strftime('%Y-%m-%d')
-        soup = self.index_to_soup(
-                'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
-        div = soup.find(id='left-column')
-        feeds = []
+        soup = self.index_to_soup('http://www.thehindu.com/todays-paper/')
+        div = soup.find('div', attrs={'id':'left-column'})
+        soup.find(id='subnav-tpbar').extract()
+
+
+
        current_section = None
        current_articles = []
-        for x in div.findAll(['h3', 'div']):
-            if current_section and x.get('class', '') == 'tpaper':
-                a = x.find('a', href=True)
-                if a is not None:
-                    title = self.tag_to_string(a)
-                    self.log('\tFound article:', title)
-                    current_articles.append({'url':a['href']+'?css=print',
-                        'title':title, 'date': '',
-                        'description':''})
-            if x.name == 'h3':
-                if current_section and current_articles:
+        feeds = []
+        for x in div.findAll(['a', 'span']):
+            if x.name == 'span' and x['class'] == 's-link':
+                # Section heading found
+                if current_articles and current_section:
                    feeds.append((current_section, current_articles))
                current_section = self.tag_to_string(x)
-                self.log('Found section:', current_section)
                current_articles = []
+                self.log('\tFound section:', current_section)
+            elif x.name == 'a':
+
+                        title = self.tag_to_string(x)
+                        url = x.get('href', False)
+                        if not url or not title:
+                            continue
+                        self.log('\t\tFound article:', title)
+                        self.log('\t\t\t', url)
+                        current_articles.append({'title': title, 'url':url,
+                            'description':'', 'date':''})
+
+        if current_articles and current_section:
+             feeds.append((current_section, current_articles))
+
        return feeds

-
--- a/recipes/historia_pl.recipe
+++ b/recipes/historia_pl.recipe
@ -1,27 +1,22 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class Historia_org_pl(BasicNewsRecipe):
-    title          = u'Historia.org.pl'
-    __author__        = 'fenuks'
-    description   = u'Artykuły dotyczące historii w układzie epok i tematów, forum. Najlepsza strona historii. Matura z historii i egzamin gimnazjalny z historii.'
-    cover_url      = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg'
-    category       = 'history'
-    language       = 'pl'
+    title = u'Historia.org.pl'
+    __author__ = 'fenuks'
+    description = u'Artykuły dotyczące historii w układzie epok i tematów, forum. Najlepsza strona historii. Matura z historii i egzamin gimnazjalny z historii.'
+    cover_url = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg'
+    category = 'history'
+    language = 'pl'
    oldest_article = 8
+    extra_css = 'img {float: left; margin-right: 10px;} .alignleft {float: left; margin-right: 10px;}'
    remove_empty_feeds= True
    no_stylesheets = True
    use_embedded_content = True
    max_articles_per_feed = 100
    ignore_duplicate_articles = {'title', 'url'}
-
-
-    feeds          = [(u'Wszystkie', u'http://historia.org.pl/feed/'),
-		(u'Wiadomości', u'http://historia.org.pl/Kategoria/wiadomosci/feed/'),
-		(u'Publikacje', u'http://historia.org.pl/Kategoria/artykuly/feed/'),
-		(u'Publicystyka', u'http://historia.org.pl/Kategoria/publicystyka/feed/'),
-		(u'Recenzje', u'http://historia.org.pl/Kategoria/recenzje/feed/'),
-		(u'Projekty', u'http://historia.org.pl/Kategoria/projekty/feed/'),]
-
-
-    def print_version(self, url):
-        return url + '?tmpl=component&print=1&layout=default&page='
+    feeds = [(u'Wszystkie', u'http://historia.org.pl/feed/'),
+        (u'Wiadomości', u'http://historia.org.pl/Kategoria/wiadomosci/feed/'),
+        (u'Publikacje', u'http://historia.org.pl/Kategoria/artykuly/feed/'),
+        (u'Publicystyka', u'http://historia.org.pl/Kategoria/publicystyka/feed/'),
+        (u'Recenzje', u'http://historia.org.pl/Kategoria/recenzje/feed/'),
+        (u'Projekty', u'http://historia.org.pl/Kategoria/projekty/feed/'),]
--- a/recipes/history_today.recipe
+++ b/recipes/history_today.recipe
@ -1,6 +1,6 @@
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
 from collections import OrderedDict
+import re
+from calibre.web.feeds.news import BasicNewsRecipe

 class HistoryToday(BasicNewsRecipe):

@ -19,7 +19,6 @@ class HistoryToday(BasicNewsRecipe):


    needs_subscription = True
-
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
@ -46,8 +45,9 @@ class HistoryToday(BasicNewsRecipe):

        #Go to issue
        soup = self.index_to_soup('http://www.historytoday.com/contents')
-        cover = soup.find('div',attrs={'id':'content-area'}).find('img')['src']
+        cover = soup.find('div',attrs={'id':'content-area'}).find('img', attrs={'src':re.compile('.*cover.*')})['src']
        self.cover_url=cover
+        self.log(self.cover_url)

        #Go to the main body

@ -84,4 +84,3 @@ class HistoryToday(BasicNewsRecipe):

    def cleanup(self):
        self.browser.open('http://www.historytoday.com/logout')
-
--- a/recipes/icons/dzial_zagraniczny.png
+++ b/recipes/icons/dzial_zagraniczny.png
--- a/recipes/icons/equipped.png
+++ b/recipes/icons/equipped.png
--- a/recipes/icons/forbes_pl.png
+++ b/recipes/icons/forbes_pl.png
--- a/recipes/icons/gazeta-prawna-calibre-v1.png
+++ b/recipes/icons/gazeta-prawna-calibre-v1.png
--- a/recipes/icons/gazeta_pl_krakow.png
+++ b/recipes/icons/gazeta_pl_krakow.png
--- a/recipes/icons/gazeta_pl_szczecin.png
+++ b/recipes/icons/gazeta_pl_szczecin.png
--- a/recipes/icons/gazeta_pl_warszawa.png
+++ b/recipes/icons/gazeta_pl_warszawa.png
--- a/recipes/icons/gazeta_wyborcza.png
+++ b/recipes/icons/gazeta_wyborcza.png
--- a/recipes/icons/gofin_pl.png
+++ b/recipes/icons/gofin_pl.png
--- a/recipes/icons/histmag.png
+++ b/recipes/icons/histmag.png
--- a/recipes/icons/ittechblog.png
+++ b/recipes/icons/ittechblog.png
--- a/recipes/icons/kdefamily_pl.png
+++ b/recipes/icons/kdefamily_pl.png
--- a/recipes/icons/km_blog.png
+++ b/recipes/icons/km_blog.png
--- a/recipes/icons/magazyn_consido.png
+++ b/recipes/icons/magazyn_consido.png
--- a/recipes/icons/media2.png
+++ b/recipes/icons/media2.png
--- a/recipes/icons/mobilna.png
+++ b/recipes/icons/mobilna.png
--- a/recipes/icons/mojegotowanie.png
+++ b/recipes/icons/mojegotowanie.png
--- a/recipes/icons/najwyzszy_czas.png
+++ b/recipes/icons/najwyzszy_czas.png
--- a/recipes/icons/nowiny_rybnik.png
+++ b/recipes/icons/nowiny_rybnik.png
--- a/recipes/icons/optyczne_pl.png
+++ b/recipes/icons/optyczne_pl.png
--- a/recipes/icons/osw.png
+++ b/recipes/icons/osw.png
--- a/recipes/icons/ppe_pl.png
+++ b/recipes/icons/ppe_pl.png
--- a/recipes/icons/presseurop.png
+++ b/recipes/icons/presseurop.png
--- a/recipes/icons/res_publica.png
+++ b/recipes/icons/res_publica.png
--- a/recipes/icons/slashdot.png
+++ b/recipes/icons/slashdot.png
--- a/recipes/icons/sport_pl.png
+++ b/recipes/icons/sport_pl.png
--- a/recipes/icons/sportowefakty.png
+++ b/recipes/icons/sportowefakty.png
--- a/recipes/icons/wolne_media.png
+++ b/recipes/icons/wolne_media.png
--- a/recipes/icons/wysokie_obcasy.png
+++ b/recipes/icons/wysokie_obcasy.png
--- a/recipes/infra_pl.recipe
+++ b/recipes/infra_pl.recipe
@ -1,21 +1,20 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class INFRA(BasicNewsRecipe):
-    title          = u'INFRA'
+    title = u'INFRA'
    oldest_article = 7
    max_articles_per_feed = 100
-    __author__        = 'fenuks'
-    description   = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
-    cover_url      = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
-    category       = 'UFO'
+    __author__ = 'fenuks'
+    description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
+    cover_url = 'http://i.imgur.com/j7hJT.jpg'
+    category = 'UFO'
    index='http://infra.org.pl'
-    language       = 'pl'
+    language = 'pl'
    max_articles_per_feed = 100
-    no_stylesheers=True
-    remove_tags_before=dict(name='h2', attrs={'class':'contentheading'})
-    remove_tags_after=dict(attrs={'class':'pagenav'})
-    remove_tags=[dict(attrs={'class':'pagenav'})]
-    feeds          = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/rss')]
+    remove_attrs = ['style']
+    no_stylesheets = True
+    keep_only_tags = [dict(id='ja-current-content')]
+    feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/rss')]

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
@ -23,4 +22,4 @@ class INFRA(BasicNewsRecipe):
        for a in soup('a'):
            if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
                a['href']=self.index + a['href']
-        return soup
+        return soup
--- a/recipes/interia_fakty.recipe
+++ b/recipes/interia_fakty.recipe
@ -1,23 +1,24 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+__copyright__ = u'2010-2013, Tomasz Dlugosz <tomek3d@gmail.com>'
 '''
 fakty.interia.pl
 '''
-
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class InteriaFakty(BasicNewsRecipe):
    title          = u'Interia.pl - Fakty'
    description    = u'Fakty ze strony interia.pl'
    language = 'pl'
-    oldest_article = 7
+    oldest_article = 1
    __author__ = u'Tomasz D\u0142ugosz'
-    simultaneous_downloads = 2
    no_stylesheets = True
    remove_javascript = True
-    max_articles_per_feed = 100
+    remove_empty_feeds= True
+    use_embedded_content = False
+    ignore_duplicate_articles = {'title', 'url'}

    feeds          = [(u'Kraj', u'http://kanaly.rss.interia.pl/kraj.xml'),
                      (u'\u015awiat', u'http://kanaly.rss.interia.pl/swiat.xml'),
@ -26,14 +27,36 @@ class InteriaFakty(BasicNewsRecipe):
                      (u'Wywiady', u'http://kanaly.rss.interia.pl/wywiady.xml'),
                      (u'Ciekawostki', u'http://kanaly.rss.interia.pl/ciekawostki.xml')]

-    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
+    keep_only_tags = [
+        dict(name='h1'),
+        dict(name='div', attrs={'class': ['lead textContent', 'text textContent', 'source']})]

-    remove_tags = [
-        dict(name='div', attrs={'class':'box fontSizeSwitch'}),
-        dict(name='div', attrs={'class':'clear'}),
-        dict(name='div', attrs={'class':'embed embedLeft articleEmbedArticleList articleEmbedArticleListTitle'}),
-        dict(name='span', attrs={'class':'keywords'})]
+    remove_tags = [dict(name='div', attrs={'class':['embed embedAd', 'REMOVE', 'boxHeader']})]
+
+    preprocess_regexps = [
+        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
+        [
+            (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
+            (r'</div>    <div class="source">', lambda match: ''),
+            (r'<p><a href="http://forum.interia.pl.*?</a></p>', lambda match: '')
+        ]
+    ]
+
+    def get_article_url(self, article):
+        link = article.get('link', None)
+        if link and 'galerie' not in link and link.split('/')[-1]=="story01.htm":
+            link=link.split('/')[-2]
+            encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
+                '0D': '?', '0E': '-', '0H': ',', '0I': '_', '0N': '.com', '0L': 'http://'}
+            for k, v in encoding.iteritems():
+                link = link.replace(k, v)
+            return link
+
+    def print_version(self, url):
+        chunks = url.split(',')
+        return chunks[0] + '/podglad-wydruku'+ ',' + ','.join(chunks[1:])

    extra_css = '''
-            h2 { font-size: 1.2em; }
-        '''
+        h1 { font-size:130% }
+        div.info { font-style:italic; font-size:70%}
+    '''
--- a/recipes/interia_sport.recipe
+++ b/recipes/interia_sport.recipe
@ -1,7 +1,7 @@
 #!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = u'2010, Tomasz Dlugosz <tomek3d@gmail.com>'
+__copyright__ = u'2010-2013, Tomasz Dlugosz <tomek3d@gmail.com>'
 '''
 sport.interia.pl
 '''
@ -13,61 +13,51 @@ class InteriaSport(BasicNewsRecipe):
    title          = u'Interia.pl - Sport'
    description    = u'Sport ze strony interia.pl'
    language = 'pl'
-    oldest_article = 7
+    oldest_article = 1
    __author__ = u'Tomasz D\u0142ugosz'
-    simultaneous_downloads = 3
    no_stylesheets = True
    remove_javascript = True
-    max_articles_per_feed = 100
+    remove_empty_feeds= True
+    use_embedded_content = False
+    ignore_duplicate_articles = {'title', 'url'}

    feeds          = [(u'Wydarzenia sportowe', u'http://kanaly.rss.interia.pl/sport.xml'),
                      (u'Pi\u0142ka no\u017cna', u'http://kanaly.rss.interia.pl/pilka_nozna.xml'),
-                      (u'Siatk\xf3wka', u'http://kanaly.rss.interia.pl/siatkowka.xml'),
                      (u'Koszyk\xf3wka', u'http://kanaly.rss.interia.pl/koszykowka.xml'),
-                      (u'NBA', u'http://kanaly.rss.interia.pl/nba.xml'),
-                      (u'Kolarstwo', u'http://kanaly.rss.interia.pl/kolarstwo.xml'),
-                      (u'\u017bu\u017cel', u'http://kanaly.rss.interia.pl/zuzel.xml'),
                      (u'Tenis', u'http://kanaly.rss.interia.pl/tenis.xml')]

-    keep_only_tags = [dict(name='div', attrs={'id':'article'})]
+    keep_only_tags = [
+        dict(name='h1'),
+        dict(name='div', attrs={'class': ['lead textContent', 'text textContent', 'source']})]

-    remove_tags = [dict(name='div', attrs={'class':'object gallery'}),
-                   dict(name='div', attrs={'class':'box fontSizeSwitch'})]
-
-    extra_css = '''
-        .articleDate {
-        font-size: 0.5em;
-        color: black;
-        }
-
-        .articleFoto {
-        display: block;
-        font-family: sans;
-        font-size: 0.5em;
-        text-indent: 0
-        color: black;
-        }
-
-        .articleText {
-        display: block;
-        margin-bottom: 1em;
-        margin-left: 0;
-        margin-right: 0;
-        margin-top: 1em
-        color: black;
-        }
-
-        .articleLead {
-        font-size: 1.2em;
-        }
-        '''
+    remove_tags = [dict(name='div', attrs={'class':['embed embedAd', 'REMOVE', 'boxHeader']})]

    preprocess_regexps = [
        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
        [
            (r'<p><a href.*?</a></p>', lambda match: ''),
-           # FIXME
-           #(r'(<div id="newsAddContent">)(.*?)(<a href=".*">)(.*?)(</a>)', lambda match: '\1\2\4'),
-            (r'<p>(<i>)?<b>(ZOBACZ|CZYTAJ) T.*?</div>', lambda match: '</div>')
+            (r'<p>(<i>)?<b>(ZOBACZ|CZYTAJ) T.*?</div>', lambda match: '</div>'),
+            (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'),
+            (r'</div>    <div class="source">', lambda match: ''),
+            (r'<p><a href="http://forum.interia.pl.*?</a></p>', lambda match: '')
        ]
    ]
+
+    def get_article_url(self, article):
+        link = article.get('link', None)
+        if link and 'galerie' not in link and link.split('/')[-1]=="story01.htm":
+            link=link.split('/')[-2]
+            encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
+                '0D': '?', '0E': '-', '0H': ',', '0I': '_', '0N': '.com', '0L': 'http://'}
+            for k, v in encoding.iteritems():
+                link = link.replace(k, v)
+            return link
+
+    def print_version(self, url):
+        chunks = url.split(',')
+        return chunks[0] + '/podglad-wydruku'+ ',' + ','.join(chunks[1:])
+
+    extra_css = '''
+        h1 { font-size:130% }
+        div.info { font-style:italic; font-size:70%}
+    '''
--- a/recipes/irish_times.recipe
+++ b/recipes/irish_times.recipe
@ -1,65 +1,62 @@
 __license__  = 'GPL v3'
-__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns"
+__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns, 2013 Tom Scholl"
 '''
 irishtimes.com
 '''
-import re
+import urlparse, re

 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ptempfile import PersistentTemporaryFile
+

 class IrishTimes(BasicNewsRecipe):
    title          = u'The Irish Times'
-    encoding  = 'ISO-8859-15'
-    __author__    = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns"
+    __author__    = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns, Tom Scholl"
    language = 'en_IE'
-    timefmt = ' (%A, %B %d, %Y)'

+    masthead_url = 'http://www.irishtimes.com/assets/images/generic/website/logo_theirishtimes.png'

+    encoding = 'utf-8'
    oldest_article = 1.0
-    max_articles_per_feed  = 100
+    max_articles_per_feed = 100
+    remove_empty_feeds = True
    no_stylesheets = True
-    simultaneous_downloads= 5
-
-    r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
-    remove_tags    = [dict(name='div', attrs={'class':'footer'})]
-    extra_css      = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em } .headline {font-size: large;} \n .fact { padding-top: 10pt  }'
+    temp_files = []
+    articles_are_obfuscated = True

    feeds          = [
-                      ('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
-                      ('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'),
-                      ('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'),
-                      ('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'),
-                      ('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'),
-                      ('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
-                      ('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
-                      ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
-                      ('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
-                      ('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
-                      ('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
-                      ('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
-                      ('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
-                      ('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
-                      ('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
-                      ('Property', 'http://www.irishtimes.com/feeds/rss/newspaper/property.rss'),
-                      ('The Tickets', 'http://www.irishtimes.com/feeds/rss/newspaper/theticket.rss'),
-                      ('Weekend', 'http://www.irishtimes.com/feeds/rss/newspaper/weekend.rss'),
-                      ('News features', 'http://www.irishtimes.com/feeds/rss/newspaper/newsfeatures.rss'),
-                      ('Obituaries', 'http://www.irishtimes.com/feeds/rss/newspaper/obituaries.rss'),
+                      ('News', 'http://www.irishtimes.com/cmlink/the-irish-times-news-1.1319192'),
+                      ('World', 'http://www.irishtimes.com/cmlink/irishtimesworldfeed-1.1321046'),
+                      ('Politics', 'http://www.irishtimes.com/cmlink/irish-times-politics-rss-1.1315953'),
+                      ('Business', 'http://www.irishtimes.com/cmlink/the-irish-times-business-1.1319195'),
+                      ('Culture', 'http://www.irishtimes.com/cmlink/the-irish-times-culture-1.1319213'),
+                      ('Sport', 'http://www.irishtimes.com/cmlink/the-irish-times-sport-1.1319194'),
+                      ('Debate', 'http://www.irishtimes.com/cmlink/debate-1.1319211'),
+                      ('Life & Style', 'http://www.irishtimes.com/cmlink/the-irish-times-life-style-1.1319214'),
                    ]


-    def print_version(self, url):
-        if url.count('rss.feedsportal.com'):
-            #u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
-            u = url.find('irishtimes')
-            u = 'http://www.irishtimes.com' + url[u + 12:]
-            u = u.replace('0C', '/')
-            u = u.replace('A', '')
-            u = u.replace('0Bhtml/story01.htm', '_pf.html')
-        else:
-            u = url.replace('.html','_pf.html')
-        return u
+    def get_obfuscated_article(self, url):
+        # Insert a pic from the original url, but use content from the print url
+        pic = None
+        pics = self.index_to_soup(url)
+        div = pics.find('div', {'class' : re.compile('image-carousel')})
+        if div:
+            pic = div.img
+            if pic:
+                try:
+                    pic['src'] = urlparse.urljoin(url, pic['src'])
+                    pic.extract()
+                except:
+                    pic = None
+
+        content = self.index_to_soup(url + '?mode=print&ot=example.AjaxPageLayout.ot')
+        if pic:
+            content.p.insert(0, pic)
+
+        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
+        self.temp_files[-1].write(content.prettify())
+        self.temp_files[-1].close()
+        return self.temp_files[-1].name

-    def get_article_url(self, article):
-        return article.link

--- a/recipes/ittechblog.recipe
+++ b/recipes/ittechblog.recipe
@ -0,0 +1,26 @@
+__license__ = 'GPL v3'
+__copyright__ = 'MrStefan'
+
+'''
+www.ittechblog.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ittechblog(BasicNewsRecipe):
+    title = u'IT techblog'
+    __author__ = 'MrStefan <mrstefaan@gmail.com>'
+    language = 'pl'
+    description =u'Na naszym blogu technologicznym znajdziesz między innymi: testy sprzętu, najnowsze startupy, technologiczne nowinki, felietony tematyczne.'
+    extra_css = '.cover > img {display:block;}'
+    remove_empty_feeds = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = False
+
+    keep_only_tags =[dict(attrs={'class':'box'})]
+    remove_tags =[dict(name='aside'), dict(attrs={'class':['tags', 'counter', 'twitter-share-button']})]
+
+    feeds          = [(u'Artykuły', u'http://feeds.feedburner.com/ITTechBlog?format=xml')]
--- a/recipes/kdefamily_pl.recipe
+++ b/recipes/kdefamily_pl.recipe
@ -1,14 +1,16 @@
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class KDEFamilyPl(BasicNewsRecipe):
-    title          = u'KDEFamily.pl'
-    __author__        = 'fenuks'
-    description   = u'KDE w Polsce'
-    category       = 'open source, KDE'
-    language       = 'pl'
+    title = u'KDEFamily.pl'
+    __author__ = 'fenuks'
+    description = u'KDE w Polsce'
+    category = 'open source, KDE'
+    language = 'pl'
    cover_url = 'http://www.mykde.home.pl/kdefamily/wp-content/uploads/2012/07/logotype-e1341585198616.jpg'
    oldest_article = 7
    max_articles_per_feed = 100
+    preprocess_regexps = [(re.compile(r"Podobne wpisy.*", re.IGNORECASE|re.DOTALL), lambda m: '')]
    no_stylesheets = True
    use_embedded_content = True
-    feeds          = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
+    feeds = [(u'Wszystko', u'http://kdefamily.pl/feed/')]
--- a/recipes/km_blog.recipe
+++ b/recipes/km_blog.recipe
@ -0,0 +1,36 @@
+
+__license__ = 'GPL v3'
+__author__ = 'teepel <teepel44@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
+
+'''
+korwin-mikke.pl/blog
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class km_blog(BasicNewsRecipe):
+    title          = u'Korwin-Mikke Blog'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language       = 'pl'
+    description ='Wiadomości z bloga korwin-mikke.pl/blog'
+    INDEX='http://korwin-mikke.pl/blog'
+    remove_empty_feeds= True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+    remove_empty_feeds = True
+
+    feeds          = [(u'blog', u'http://korwin-mikke.pl/blog/rss')]
+
+    keep_only_tags =[]
+        #this line should show title of the article, but it doesnt work
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'posts view'}))
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'text'}))
+    keep_only_tags.append(dict(name = 'h1'))
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'p', attrs = {'class' : 'float_right'}))
+    remove_tags.append(dict(name = 'p', attrs = {'class' : 'date'}))
+
+    remove_tags_after=[(dict(name = 'div', attrs = {'class': 'text'}))]
--- a/recipes/konflikty_zbrojne.recipe
+++ b/recipes/konflikty_zbrojne.recipe
@ -3,10 +3,10 @@ from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup

 class Konflikty(BasicNewsRecipe):
-    title          = u'Konflikty Zbrojne'
-    __author__        = 'fenuks'
-    cover_url      = 'http://www.konflikty.pl/images/tapety_logo.jpg'
-    language       = 'pl'
+    title = u'Konflikty Zbrojne'
+    __author__ = 'fenuks'
+    cover_url = 'http://www.konflikty.pl/images/tapety_logo.jpg'
+    language = 'pl'
    description = u'Zbiór ciekawych artykułów historycznych, militarnych oraz recenzji książek, gier i filmów. Najświeższe informacje o lotnictwie, wojskach lądowych i polityce.'
    category='military, history'
    oldest_article = 7
@ -14,19 +14,20 @@ class Konflikty(BasicNewsRecipe):
    no_stylesheets = True
    keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')]

-    feeds          = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
-		(u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
-		(u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
-		(u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),
-		(u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
-		(u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]
+    feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
+        (u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
+        (u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
+        (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),
+        (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
+        (u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for image in soup.findAll(name='a', attrs={'class':'image'}):
+            image['style'] = 'width: 210px; float: left; margin-right:5px;'
            if image.img and image.img.has_key('alt'):
                image.name='div'
                pos = len(image.contents)
                image.insert(pos, BeautifulSoup('<p style="font-style:italic;">'+image.img['alt']+'</p>'))
-        return soup
+        return soup
--- a/recipes/kosmonauta_pl.recipe
+++ b/recipes/kosmonauta_pl.recipe
@ -2,12 +2,13 @@

 from calibre.web.feeds.news import BasicNewsRecipe
 class Kosmonauta(BasicNewsRecipe):
-    title          = u'Kosmonauta.net'
-    __author__        = 'fenuks'
-    description   = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
-    category       = 'astronomy'
-    language       = 'pl'
+    title = u'Kosmonauta.net'
+    __author__ = 'fenuks'
+    description = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
+    category = 'astronomy'
+    language = 'pl'
    cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
+    extra_css = '.thumbnail {float:left;margin-right:5px;}'
    no_stylesheets = True
    INDEX = 'http://www.kosmonauta.net'
    oldest_article = 7
@ -16,9 +17,12 @@ class Kosmonauta(BasicNewsRecipe):
    remove_attributes = ['style']
    max_articles_per_feed = 100
    keep_only_tags = [dict(name='div', attrs={'class':'item-page'})]
-    remove_tags = [dict(attrs={'class':['article-tools clearfix', 'cedtag', 'nav clearfix', 'jwDisqusForm']})]
+    remove_tags = [dict(attrs={'class':['article-tools clearfix', 'cedtag', 'nav clearfix', 'jwDisqusForm']}), dict(attrs={'alt':['Poprzednia strona', 'Następna strona']})]
    remove_tags_after = dict(name='div', attrs={'class':'cedtag'})
-    feeds          = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/?format=feed&type=atom')]
+    feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/?format=feed&type=atom')]
+    
+    def print_version(self, url):
+        return url + '?tmpl=component&print=1&layout=default&page='

    def preprocess_html(self, soup):
        for a in soup.findAll(name='a'):
@ -26,5 +30,4 @@ class Kosmonauta(BasicNewsRecipe):
               href = a['href']
               if not href.startswith('http'):
                   a['href'] = self.INDEX + href
-        return soup
-            
+        return soup
--- a/recipes/kp.recipe
+++ b/recipes/kp.recipe
@ -2,8 +2,7 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class KrytykaPolitycznaRecipe(BasicNewsRecipe):
-    __license__ = 'GPL v3'
-    __author__ = u'intromatyk <intromatyk@gmail.com>'
+    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
    language = 'pl'
    version = 1

--- a/recipes/ledevoir.recipe
+++ b/recipes/ledevoir.recipe
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __author__    = 'Lorenzo Vigentini and Olivier Daigle'
 __copyright__ = '2012, Lorenzo Vigentini <l.vigentini at gmail.com>, Olivier Daigle <odaigle _at nuvucameras __dot__ com>'
 __version__     = 'v1.01'
-__date__        = '22, December 2012'
+__date__        = '17, March 2013'
 __description__   = 'Canadian Paper '

 '''
@ -28,10 +28,14 @@ class ledevoir(BasicNewsRecipe):

    oldest_article = 1
    max_articles_per_feed = 200
+    min_articles_per_feed = 0
    use_embedded_content  = False
    recursion             = 10
    needs_subscription    = 'optional'

+    compress_news_images = True
+    compress_news_images_auto_size = 4
+
    filterDuplicates = False
    url_list = []

@ -66,16 +70,16 @@ class ledevoir(BasicNewsRecipe):

    feeds          = [
                       (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
-#                       (u'Édition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
-#                       (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
-#                       (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
-#                       (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
-#                       (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
-#                       (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
-#                       (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
-#                       (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
-#                       (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
-#                       (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
+                       (u'Édition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
+                       (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
+                       (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
+                       (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
+                       (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
+                       (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
+                       (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
+                       (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
+                       (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
+                       (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
                       (u'Art de vivre', 'http://www.ledevoir.com/rss/section/art-de-vivre.xml?id=50')
                     ]

@ -113,3 +117,23 @@ class ledevoir(BasicNewsRecipe):
        self.url_list.append(url)
        return url

+'''
+    def postprocess_html(self, soup, first):
+        #process all the images. assumes that the new html has the correct path
+        if first == 0:
+          return soup
+
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+             iurl = tag['src']
+             img = Image()
+             img.open(iurl)
+        #     width, height = img.size
+        #     print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
+             if img < 0:
+                raise RuntimeError('Out of memory')
+             img.set_compression_quality(30)
+             img.save(iurl)
+        return soup
+'''
+
+
--- a/recipes/list_apart.recipe
+++ b/recipes/list_apart.recipe
@ -1,33 +1,23 @@
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+# vim:fileencoding=UTF-8
+from __future__ import unicode_literals
 from calibre.web.feeds.news import BasicNewsRecipe

 class AListApart (BasicNewsRecipe):
-   __author__ = u'Marc Busqué <marc@lamarciana.com>'
+   __author__ = 'Marc Busqué <marc@lamarciana.com>'
   __url__ = 'http://www.lamarciana.com'
-   __version__ = '1.0'
+   __version__ = '2.0'
   __license__   = 'GPL v3'
-   __copyright__ = u'2012, Marc Busqué <marc@lamarciana.com>'
+   __copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
   title = u'A List Apart'
-   description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices.'
+   description = u'A List Apart Magazine (ISSN: 1534-0295) explores the design, development, and meaning of web content, with a special focus on web standards and best practices. This recipe retrieve articles and columns.'
   language = 'en'
   tags = 'web development, software'
   oldest_article = 120
   remove_empty_feeds = True
-   no_stylesheets = True
   encoding = 'utf8'
   cover_url = u'http://alistapart.com/pix/alalogo.gif'
-   keep_only_tags = [
-         dict(name='div', attrs={'id': 'content'})
-         ]
-   remove_tags = [
-         dict(name='ul', attrs={'id': 'metastuff'}),
-         dict(name='div', attrs={'class': 'discuss'}),
-         dict(name='div', attrs={'class': 'discuss'}),
-         dict(name='div', attrs={'id': 'learnmore'}),
-         ]
-   remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
-   extra_css = u'img {max-width: 100%; display: block; margin: auto;} #authorbio img {float: left; margin-right: 2%;}'
+   extra_css = u'img {max-width: 100%; display: block; margin: auto;}'

   feeds = [
-         (u'A List Apart', u'http://www.alistapart.com/site/rss'),
+         (u'A List Apart', u'http://feeds.feedburner.com/alistapart/abridged'),
         ]
--- a/recipes/magazyn_consido.recipe
+++ b/recipes/magazyn_consido.recipe
@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+
+'''
+magazynconsido.pl/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.utils.magick import Image
+
+class magazynconsido(BasicNewsRecipe):
+    title = u'Magazyn Consido'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com> ,teepel <teepel44@gmail.com>'
+    language = 'pl'
+    description =u'Portal dla architektów i projektantów'
+    masthead_url='http://qualitypixels.pl/wp-content/themes/airlock/advance/inc/timthumb.php?src=http://qualitypixels.pl/wp-content/uploads/2012/01/logotyp-magazynconsido-11.png&w=455&zc=1'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets = True
+    use_embedded_content = False
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'h1'))
+    keep_only_tags.append(dict(name = 'p'))
+    keep_only_tags.append(dict(attrs = {'class' : 'navigation'}))
+    remove_tags =[dict(attrs = {'style' : 'font-size: x-small;' })]
+
+    remove_tags_after =[dict(attrs = {'class' : 'navigation' })]
+
+    extra_css='''       img {max-width:30%; max-height:30%; display: block; margin-left: auto; margin-right: auto;}
+                        h1 {text-align: center;}'''
+
+    def parse_index(self): #(kk)
+        soup = self.index_to_soup('http://feeds.feedburner.com/magazynconsido?format=xml')
+        feeds = []
+        articles = {}
+        sections = []
+        section = ''
+
+        for item in soup.findAll('item') :
+            section = self.tag_to_string(item.category)
+            if not articles.has_key(section) :
+                sections.append(section)
+                articles[section] = []
+            article_url = self.tag_to_string(item.guid)
+            article_title = self.tag_to_string(item.title)
+            article_date = self.tag_to_string(item.pubDate)
+            article_description = self.tag_to_string(item.description)
+            articles[section].append( { 'title' : article_title,  'url' : article_url, 'date' : article_date, 'description' : article_description })
+
+        for section in sections :
+            if section == 'Video':
+                feeds.append((section, articles[section]))
+                feeds.pop()
+            else:
+                feeds.append((section, articles[section]))
+        return feeds
+
+    def append_page(self, soup, appendtag):
+        apage = soup.find('div', attrs={'class':'wp-pagenavi'})
+        if apage is not None:
+            nexturl = soup.find('a', attrs={'class':'nextpostslink'})
+            soup2 = self.index_to_soup(nexturl['href'])
+            pagetext = soup2.findAll('p')
+            for tag in pagetext:
+                pos = len(appendtag.contents)
+                appendtag.insert(pos, tag)
+
+        while appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}) is not None:
+            appendtag.find('div', attrs={'class': ['height: 35px;', 'post-meta', 'addthis_toolbox addthis_default_style addthis_', 'post-meta-bottom', 'block_recently_post', 'fbcomments', 'pin-it-button', 'pages', 'navigation']}).replaceWith('')
+
+    def preprocess_html(self, soup): #(kk)
+        self.append_page(soup, soup.body)
+        return self.adeify_images(soup)
+
+    def postprocess_html(self, soup, first):
+        #process all the images
+        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
+            iurl = tag['src']
+            img = Image()
+            img.open(iurl)
+            if img < 0:
+                raise RuntimeError('Out of memory')
+            img.type = "GrayscaleType"
+            img.save(iurl)
+        return soup
--- a/recipes/media2.recipe
+++ b/recipes/media2.recipe
@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = 'teepel'
+
+'''
+media2.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class media2_pl(BasicNewsRecipe):
+    title = u'Media2'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language = 'pl'
+    description =u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.'
+    masthead_url='http://media2.pl/res/logo/www.png'
+    remove_empty_feeds= True
+    oldest_article = 1
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+    simultaneous_downloads = 5
+
+    extra_css = '''.news-lead{font-weight: bold; }'''
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-item tpl-big'}))
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'span', attrs = {'class' : 'news-comments'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'item-sidebar'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'news-tags'}))
+
+    feeds          = [(u'Media2', u'http://feeds.feedburner.com/media2')]
--- a/recipes/metro_uk.recipe
+++ b/recipes/metro_uk.recipe
@ -6,10 +6,10 @@ import time

 class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Metro UK'
-    description = 'News as provided by The Metro -UK'
+    description = 'News from The Metro, UK'
    #timefmt = ''
-    __author__ = 'fleclerc & Dave Asbury'
-    #last update 20/1/13
+    __author__ = 'Dave Asbury'
+    #last update 4/4/13
    #cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'

    cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
@ -22,7 +22,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):

    language = 'en_GB'
    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
-
+    compress_news_images = True
    def parse_index(self):
 		articles = {}
 		key = None
--- a/recipes/mlody_technik_pl.recipe
+++ b/recipes/mlody_technik_pl.recipe
@ -2,13 +2,14 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class Mlody_technik(BasicNewsRecipe):
-    title          = u'Młody technik'
-    __author__        = 'fenuks'
-    description   = u'Młody technik'
-    category       = 'science'
-    language       = 'pl'
+    title = u'Młody technik'
+    __author__ = 'fenuks'
+    description = u'Młody technik'
+    category = 'science'
+    language = 'pl'
    #cover_url = 'http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
    no_stylesheets = True
+    extra_css = 'img.alignleft {float: left; margin-right: 5px;}'
    preprocess_regexps = [(re.compile(r"<h4>Podobne</h4>", re.IGNORECASE), lambda m: '')]
    oldest_article = 7
    max_articles_per_feed = 100
@ -17,18 +18,18 @@ class Mlody_technik(BasicNewsRecipe):
    keep_only_tags = [dict(id='content')]
    remove_tags = [dict(attrs={'class':'st-related-posts'})]
    remove_tags_after = dict(attrs={'class':'entry-content clearfix'})
-    feeds          = [(u'Wszystko', u'http://www.mt.com.pl/feed'), 
-		#(u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'),
-		(u'Info zoom', u'http://www.mt.com.pl/kategoria/info-zoom/feed'),
-		(u'm.technik', u'http://www.mt.com.pl/kategoria/m-technik/feed'),
-		(u'Szkoła', u'http://www.mt.com.pl/kategoria/szkola-2/feed'),
-		(u'Na Warsztacie', u'http://www.mt.com.pl/kategoria/na-warsztacie/feed'),
-		(u'Z pasji do...', u'http://www.mt.com.pl/kategoria/z-pasji-do/feed'),
-		(u'MT testuje', u'http://www.mt.com.pl/kategoria/mt-testuje/feed')]
+    feeds = [(u'Wszystko', u'http://www.mt.com.pl/feed'),
+        #(u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'),
+        (u'Info zoom', u'http://www.mt.com.pl/kategoria/info-zoom/feed'),
+        (u'm.technik', u'http://www.mt.com.pl/kategoria/m-technik/feed'),
+        (u'Szkoła', u'http://www.mt.com.pl/kategoria/szkola-2/feed'),
+        (u'Na Warsztacie', u'http://www.mt.com.pl/kategoria/na-warsztacie/feed'),
+        (u'Z pasji do...', u'http://www.mt.com.pl/kategoria/z-pasji-do/feed'),
+        (u'MT testuje', u'http://www.mt.com.pl/kategoria/mt-testuje/feed')]

    def get_cover_url(self):
        soup = self.index_to_soup('http://www.mt.com.pl/')
        tag = soup.find(attrs={'class':'xoxo'})
        if tag:
            self.cover_url = tag.find('img')['src']
-        return getattr(self, 'cover_url', self.cover_url)
+        return getattr(self, 'cover_url', self.cover_url)
--- a/recipes/mobilna.recipe
+++ b/recipes/mobilna.recipe
@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = 'MrStefan'
+
+'''
+www.mobilna.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class mobilna(BasicNewsRecipe):
+    title = u'Mobilna.pl'
+    __author__ = 'MrStefan <mrstefaan@gmail.com>'
+    language = 'pl'
+    description =u'twoja mobilna strona'
+    #masthead_url=''
+    remove_empty_feeds= True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+    use_embedded_content = True
+    #keep_only_tags =[dict(attrs={'class':'Post'})]
+
+    feeds          = [(u'Artykuły', u'http://mobilna.pl/feed/')]
--- a/recipes/mojegotowanie.recipe
+++ b/recipes/mojegotowanie.recipe
@ -0,0 +1,50 @@
+#!usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = 'MrStefan, teepel'
+
+'''
+www.mojegotowanie.pl
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class mojegotowanie(BasicNewsRecipe):
+    title = u'Moje Gotowanie'
+    __author__ = 'MrStefan <mrstefaan@gmail.com>, teepel <teepel44@gmail.com>'
+    language = 'pl'
+    description =u'Gotowanie to Twoja pasja? Uwielbiasz sałatki? Lubisz grillować? Przepisy kulinarne doskonałe na wszystkie okazje znajdziesz na www.mojegotowanie.pl.'
+    masthead_url='http://www.mojegotowanie.pl/extension/selfstart/design/self/images/top_c2.gif'
+    cover_url = 'http://www.mojegotowanie.pl/extension/selfstart/design/self/images/mgpl/mojegotowanie.gif'
+    remove_empty_feeds= True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript=True
+    no_stylesheets=True
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'content'}))
+
+    feeds = [(u'Artykuły', u'http://mojegotowanie.pl/rss/feed/artykuly'),
+                 (u'Przepisy', u'http://mojegotowanie.pl/rss/feed/przepisy')]
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for feed in feeds:
+            for article in feed.articles[:]:
+                if 'film' in article.title:
+                    feed.articles.remove(article)
+        return feeds
+
+    def get_article_url(self, article):
+        link = article.get('link')
+        if 'Clayout0Cset0Cprint0' in link:
+            return link
+
+    def print_version(self, url):
+        segment = url.split('/')
+        URLPart = segment[-2]
+        URLPart = URLPart.replace('0L0Smojegotowanie0Bpl0Clayout0Cset0Cprint0C', '/')
+        URLPart = URLPart.replace('0I', '_')
+        URLPart = URLPart.replace('0C', '/')
+        return 'http://www.mojegotowanie.pl/layout/set/print' + URLPart
--- a/recipes/najwyzszy_czas.recipe
+++ b/recipes/najwyzszy_czas.recipe
@ -0,0 +1,27 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+__author__ = 'teepel <teepel44@gmail.com>'
+
+'''
+nczas.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class nczas(BasicNewsRecipe):
+    title          = u'Najwy\u017cszy Czas'
+    __author__ = 'teepel <teepel44@gmail.com>'
+    language       = 'pl'
+    description ='Wiadomości z nczas.com'
+    INDEX='http://nczas.com'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = True
+    remove_empty_feeds= True
+    simultaneous_downloads = 5
+    remove_javascript=True
+    remove_attributes = ['style']
+    no_stylesheets=True
+
+    feeds          = [(u'Najwyższy Czas', u'http://nczas.com/feed/')]
--- a/recipes/nauka_w_polsce.recipe
+++ b/recipes/nauka_w_polsce.recipe
@ -1,16 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class NaukawPolsce(BasicNewsRecipe):
-    title          = u'Nauka w Polsce'
-    __author__        = 'fenuks'
-    description   = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak:  osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.'
-    category       = 'science'
-    language       = 'pl'
+    title = u'Nauka w Polsce'
+    __author__ = 'fenuks'
+    description = u'Serwis Nauka w Polsce ma za zadanie popularyzację polskiej nauki. Można na nim znaleźć wiadomości takie jak: osiągnięcia polskich naukowców, wydarzenia na polskich uczelniach, osiągnięcia studentów, konkursy dla badaczy, staże i stypendia naukowe, wydarzenia w polskiej nauce, kalendarium wydarzeń w nauce, materiały wideo o nauce.'
+    category = 'science'
+    language = 'pl'
    cover_url = 'http://www.naukawpolsce.pap.pl/Themes/Pap/images/logo-pl.gif'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    remove_empty_feeds = True
+    extra_css = '.miniaturka {float: left; margin-right: 5px; max-width: 350px;} .miniaturka-dol-strony {display: inline-block; margin: 0 15px; width: 120px;}'
+    ignore_duplicate_articles = {'title', 'url'}
    index = 'http://www.naukawpolsce.pl'
    keep_only_tags = [dict(name='div', attrs={'class':'margines wiadomosc'})]
    remove_tags = [dict(name='div', attrs={'class':'tagi'})]
@ -23,8 +25,8 @@ class NaukawPolsce(BasicNewsRecipe):
            url = self.index + i.h1.a['href']
            date = '' #i.span.string
            articles.append({'title' : title,
-                   'url'   : url,
-                   'date'  : date,
+                   'url' : url,
+                   'date' : date,
                   'description' : ''
                    })
        return articles
@ -44,4 +46,4 @@ class NaukawPolsce(BasicNewsRecipe):
    def preprocess_html(self, soup):
        for p in soup.findAll(name='p', text=re.compile('&nbsp;')):
            p.extract()
-        return soup
+        return soup
--- a/recipes/niebezpiecznik.recipe
+++ b/recipes/niebezpiecznik.recipe
@ -1,16 +1,19 @@
 from calibre.web.feeds.news import BasicNewsRecipe

 class Niebezpiecznik_pl(BasicNewsRecipe):
-    title          = u'Niebezpiecznik.pl'
-    __author__        = 'fenuks'
-    description   = u'Niebezpiecznik.pl – o bezpieczeństwie i nie...'
-    category       = 'hacking, IT'
-    language       = 'pl'
+    title = u'Niebezpiecznik.pl'
+    __author__ = 'fenuks'
+    description = u'Niebezpiecznik.pl – o bezpieczeństwie i nie...'
+    category = 'hacking, IT'
+    language = 'pl'
    oldest_article = 8
+    extra_css = '.entry {margin-top: 25px;}'
+    remove_attrs = ['style']
    max_articles_per_feed = 100
    no_stylesheets = True
+    remove_empty_feeds = True
    cover_url = u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
    remove_tags = [dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
    keep_only_tags = [dict(name='div', attrs={'class':['title', 'entry']})]
-    feeds          = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
-      	      ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
+    feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
+              ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
--- a/recipes/nme.recipe
+++ b/recipes/nme.recipe
@ -12,6 +12,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    max_articles_per_feed = 20
    #auto_cleanup = True
    language = 'en_GB'
+    compress_news_images = True

    def get_cover_url(self):
        soup = self.index_to_soup('http://www.nme.com/component/subscribe')
@ -27,7 +28,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
            br.open_novisit(cov2)
            cover_url = str(cov2)
        except:
-                cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
+            cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
        return cover_url

    masthead_url   = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
--- a/recipes/nowiny_rybnik.recipe
+++ b/recipes/nowiny_rybnik.recipe
@ -0,0 +1,31 @@
+#!/usr/bin/env  python
+
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NowinyRybnik(BasicNewsRecipe):
+    title = u'Nowiny - Rybnik'
+    __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+    description = u'Tygodnik Regionalny NOWINY. Ogłoszenia drobne, wiadomości i wydarzenia z regionu Rybnika i okolic'
+    oldest_article = 7
+    masthead_url = 'http://www.nowiny.rybnik.pl/logo/logo.jpg'
+    max_articles_per_feed = 100
+    simultaneous_downloads = 5
+    remove_javascript = True
+    no_stylesheets = True
+
+    keep_only_tags = [(dict(name='div', attrs={'id': 'drukuj'}))]
+
+    remove_tags = []
+    remove_tags.append(dict(name='div', attrs={'id': 'footer'}))
+
+    feeds = [(u'Wszystkie artykuły', u'http://www.nowiny.rybnik.pl/rss,artykuly,dzial,0,miasto,0,ile,25.xml')]
+
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+                tstr = alink.string
+                alink.replaceWith(tstr)
+        return soup
--- a/recipes/optyczne_pl.recipe
+++ b/recipes/optyczne_pl.recipe
@ -0,0 +1,41 @@
+#!/usr/bin/env  python
+__license__ = 'GPL v3'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class OptyczneRecipe(BasicNewsRecipe):
+    __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
+    language = 'pl'
+
+    title = u'optyczne.pl'
+    category = u'News'
+    description = u'Najlepsze testy obiektywów, testy aparatów cyfrowych i testy lornetek w sieci!'
+    cover_url=''
+    remove_empty_feeds= True
+    no_stylesheets=True
+    oldest_article = 7
+    max_articles_per_feed = 100000
+    recursions = 0
+
+    no_stylesheets = True
+    remove_javascript = True
+
+    keep_only_tags =[]
+    keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news'}))
+
+    remove_tags =[]
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'center'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'news_foto'}))
+    remove_tags.append(dict(name = 'div', attrs = {'align' : 'right'}))
+
+    extra_css = '''
+                    body {font-family: Arial,Helvetica,sans-serif;}
+                    h1{text-align: left;}
+                    h2{font-size: medium; font-weight: bold;}
+                    p.lead {font-weight: bold; text-align: left;}
+                    .authordate {font-size: small; color: #696969;}
+                    .fot{font-size: x-small; color: #666666;}
+                    '''
+    feeds          = [
+                            ('Aktualnosci', 'http://www.optyczne.pl/rss.xml'),
+                           ]
--- a/Show More
+++ b/Show More