KG updates

2025-08-30 23:00:21 -04:00 · 2011-05-08 13:08:19 -06:00 · 2011-05-08 13:08:19 -06:00 · c172e07039
commit c172e07039
parent f219f0144d 81a5016041
5 changed files with 30 additions and 13 deletions
--- a/recipes/fronda.recipe
+++ b/recipes/fronda.recipe
@ -21,14 +21,19 @@ class Fronda(BasicNewsRecipe):

    feeds          = [(u'Infformacje', u'http://fronda.pl/news/feed')]

-    keep_only_tags = [dict(name='h1', attrs={'class':'big'}),
-                      dict(name='ul', attrs={'class':'about clear'}),
-                      dict(name='div', attrs={'class':'content'})]
+    keep_only_tags = [dict(name='h2', attrs={'class':'news_title'}),
+                      dict(name='div', attrs={'class':'naglowek_tresc'}),
+                      dict(name='div', attrs={'id':'czytaj'}) ]
+
+    remove_tags = [dict(name='a', attrs={'class':'print'})]
+
    preprocess_regexps = [
        (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in 
-            [ (r'<a href="#" class="print">Drukuj</a>', lambda match: ''),
-              (r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
+            [ (r'<p><a href="http://fronda.pl/sklepy">.*</a></p>', lambda match: ''),
              (r'<p><a href="http://fronda.pl/pasaz">.*</a></p>', lambda match: ''),
              (r'<h3><strong>W.* lektury.*</a></p></div>', lambda match: '</div>'),
-              (r'<h3>Zobacz t.*?</div>', lambda match: '</div>') ]
+              (r'<h3>Zobacz t.*?</div>', lambda match: '</div>'),
+              (r'<p[^>]*>&nbsp;</p>', lambda match: ''),
+              (r'<p><span style=".*?"><br /></span></p> ', lambda match: ''),
+              (r'<a style=\'float:right;margin-top:3px;\' href="http://www.facebook.com/share.php?.*?</a>', lambda match: '')]
    ]
--- a/recipes/icons/rzeczpospolita.png
+++ b/recipes/icons/rzeczpospolita.png
--- a/recipes/rzeczpospolita.recipe
+++ b/recipes/rzeczpospolita.recipe
@ -2,7 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe

 class RzeczpospolitaRecipe(BasicNewsRecipe):
    __license__  = 'GPL v3'
-    __author__ = 'kwetal'
+    __author__ = u'kwetal and Tomasz Dlugosz'
    language = 'pl'
    version = 1

@ -38,6 +38,8 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
    remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'share_bottom'}))
    remove_tags.append(dict(name = 'div', attrs = {'id' : 'copyright_law'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'editorPicks'}))

    extra_css = '''
                    body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
@ -48,6 +50,13 @@ class RzeczpospolitaRecipe(BasicNewsRecipe):
                    .fot{font-size: x-small; color: #666666;}
                    '''

+    def skip_ad_pages(self, soup):
+        if ('advertisement' in soup.find('title').string.lower()):
+            href = soup.find('a').get('href')
+            return self.index_to_soup(href, raw=True)
+        else:
+            return None
+
    def print_version(self, url):
        start, sep, rest = url.rpartition('/')
        forget, sep, index = rest.rpartition(',')
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -68,7 +68,8 @@ def check_command_line_options(parser, args, log):
        raise SystemExit(1)

    output = args[2]
-    if output.startswith('.') and output != '.':
+    if output.startswith('.') and (output != '.' and not
+            output.startswith('..')):
        output = os.path.splitext(os.path.basename(input))[0]+output
    output = os.path.abspath(output)

--- a/src/calibre/utils/titlecase.py
+++ b/src/calibre/utils/titlecase.py
@ -10,6 +10,7 @@ License: http://www.opensource.org/licenses/mit-license.php
 import re

 from calibre.utils.icu import capitalize
+from calibre.utils.config import prefs

 __all__ = ['titlecase']
 __version__ = '0.5'
@ -67,11 +68,12 @@ def titlecase(text):
            line.append(icu_lower(word))
            continue

-        match = MAC_MC.match(word)
-        if match and not match.group(2)[:3] in ('hin', 'ht'):
-            line.append("%s%s" % (capitalize(match.group(1)),
-                                  capitalize(match.group(2))))
-            continue
+        if prefs['language'].lower().startswith('en'):
+            match = MAC_MC.match(word)
+            if match and not match.group(2)[:3] in ('hin', 'ht'):
+                line.append("%s%s" % (capitalize(match.group(1)),
+                                      capitalize(match.group(2))))
+                continue

        hyphenated = []
        for item in word.split('-'):