GwR and KG revisions pre 6.35

2025-12-11 07:35:14 -05:00 · 2010-01-22 11:50:40 -07:00 · 2010-01-22 11:50:40 -07:00 · 636cfbb769
commit 636cfbb769
parent 6a8b4f9a21 3a1d39422a
8 changed files with 595 additions and 477 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -119,6 +119,8 @@
    - title: stuff.co.nz
      author: Krittika Goyal

+    - title: Editor and Publisher
+      author: XanthanGum

  improved recipes:
    - Physics Today
@ -127,6 +129,7 @@
    - FTD
    - The National Post
    - Blic
+    - Ars Technica


 - version: 0.6.34
--- a/resources/recipes/ars_technica.recipe
+++ b/resources/recipes/ars_technica.recipe
@ -1,12 +1,12 @@
-#!/usr/bin/env  python

 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 arstechnica.com
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag

 class ArsTechnica2(BasicNewsRecipe):
    title                 = u'Ars Technica'
@ -18,24 +18,24 @@ class ArsTechnica2(BasicNewsRecipe):
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'utf8'
-    remove_javascript     = True
+    encoding              = 'utf-8'
    use_embedded_content  = False
+    extra_css             = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '

-    extra_css = '''
-                    .news-item-title{font-size: medium ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
-                    .news-item-teaser{font-size: small ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
-                    .news-item-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
-                    .news-item-text{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
-                    .news-item-figure-caption-text{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:bold;}
-                    .news-item-figure-caption-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
-                '''
+    conversion_options = {
+                             'comments'  : description
+                            ,'tags'      : category
+                            ,'language'  : language
+                            ,'publisher' : publisher
+                         }

-    keep_only_tags = [dict(name='div', attrs={'id':['news-item-info','news-item']})]
+
+
+    keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]

    remove_tags = [
                     dict(name=['object','link','embed'])
-                    ,dict(name='div', attrs={'class':'related-stories'})
+                    ,dict(name='div', attrs={'class':'read-more-link'})
                  ]


@ -52,14 +52,19 @@ class ArsTechnica2(BasicNewsRecipe):
            ]

    def append_page(self, soup, appendtag, position):
-        pager = soup.find('div',attrs={'id':'pager'})
+        pager = soup.find('div',attrs={'class':'pager'})
        if pager:
           for atag in pager.findAll('a',href=True):
               str = self.tag_to_string(atag)
               if str.startswith('Next'):
-                  soup2 = self.index_to_soup(atag['href'])
+                  nurl = 'http://arstechnica.com' + atag['href']
+                  rawc = self.index_to_soup(nurl,True)
+                  soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
 
-                  texttag = soup2.find('div', attrs={'class':'news-item-text'})
+                  readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
+                  if readmoretag:
+                     readmoretag.extract()
+                  texttag = soup2.find('div', attrs={'class':'body'})
                  for it in texttag.findAll(style=True):
                      del it['style']

@ -71,10 +76,12 @@ class ArsTechnica2(BasicNewsRecipe):


    def preprocess_html(self, soup):
-
-        ftag = soup.find('div', attrs={'class':'news-item-byline'})
+        ftag = soup.find('div', attrs={'class':'byline'})
        if ftag:
-           ftag.insert(4,'<br /><br />')
+           brtag = Tag(soup,'br')
+           brtag2 = Tag(soup,'br')
+           ftag.insert(4,brtag)
+           ftag.insert(5,brtag2)

        for item in soup.findAll(style=True):
           del item['style']
@ -83,5 +90,7 @@ class ArsTechnica2(BasicNewsRecipe):

        return soup

+    def get_article_url(self, article):
+        return article.get('feedburner_origlink',  None).rpartition('?')[0]


--- a/resources/recipes/editor_and_publisher.recipe
+++ b/resources/recipes/editor_and_publisher.recipe
@ -0,0 +1,34 @@
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+class EandP(BasicNewsRecipe):
+    title              = u'Editor and Publisher'
+    __author__         = u'Xanthan Gum'
+    description        = 'News about newspapers and journalism.'
+    language = 'en'
+    no_stylesheets     = True
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+
+    # Font formatting code borrowed from kwetal
+
+    extra_css = '''
+                 body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
+                 h1{font-size: xx-large;}
+                 h2{font-size: large;}
+                '''
+
+    # Delete everything before the article
+
+    remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'})
+
+    # Delete everything after the article
+
+    preprocess_regexps = [(re.compile(r'<!--endclickprintinclude-->.*</body>', re.DOTALL|re.IGNORECASE),
+                           lambda match: '</body>'),]
+
+    feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'),
+             (u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'),
+             (u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'),
+             (u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'),
+             (u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -34,23 +34,19 @@ class PluginWidget(QWidget,Ui_Form):
    def initialize(self, name):
        self.name = name
        # Restore options from last use here
-        print "gui2.catalog.catalog_epub_mobi:initialize(): Retrieving options"
        for opt in self.OPTION_FIELDS:
-            opt_value = gprefs[self.name + '_' + opt[0]]
-            print "Restoring %s: %s" % (self.name + '_' + opt[0], opt_value)
-            setattr(self,opt[0], unicode(opt_value))
+            opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
+            getattr(self, opt[0]).setText(opt_value)

    def options(self):
-
        # Save/return the current options
-        # getattr() returns text value of QLineEdit control
-        print "gui2.catalog.catalog_epub_mobi:options(): Saving options"
        opts_dict = {}
        for opt in self.OPTION_FIELDS:
-            opt_value = unicode(getattr(self,opt[0]))
-            print "writing %s to gprefs" % opt_value
+            opt_value = unicode(getattr(self, opt[0]).text())
            gprefs.set(self.name + '_' + opt[0], opt_value)
-            opts_dict[opt[0]] = opt_value.split(',')
+            if opt[0] == 'exclude_tags':
+                opt_value = opt_value.split(',')
+            opts_dict[opt[0]] = opt_value

        opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]

--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@ -13,6 +13,7 @@
  <property name="windowTitle">
   <string>Form</string>
  </property>
+<<<<<<< TREE
  <widget class="QLabel" name="label">
   <property name="geometry">
    <rect>
@ -158,6 +159,85 @@
    <set>Qt::AlignCenter</set>
   </property>
  </widget>
+=======
+  <layout class="QGridLayout" name="gridLayout">
+   <item row="0" column="0">
+    <widget class="QLabel" name="label">
+     <property name="text">
+      <string>Tags to exclude as genres (regex):</string>
+     </property>
+     <property name="textFormat">
+      <enum>Qt::LogText</enum>
+     </property>
+     <property name="wordWrap">
+      <bool>true</bool>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="0">
+    <widget class="QLabel" name="label_2">
+     <property name="text">
+      <string>'Don't include this book' tag:</string>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="1">
+    <widget class="QLineEdit" name="exclude_tags">
+     <property name="toolTip">
+      <string extracomment="Tooltip comment here"/>
+     </property>
+    </widget>
+   </item>
+   <item row="2" column="0">
+    <widget class="QLabel" name="label_3">
+     <property name="text">
+      <string>'Mark this book as read' tag:</string>
+     </property>
+    </widget>
+   </item>
+   <item row="2" column="1">
+    <widget class="QLineEdit" name="read_tag">
+     <property name="toolTip">
+      <string extracomment="Tooltip comment here"/>
+     </property>
+    </widget>
+   </item>
+   <item row="3" column="0">
+    <widget class="QLabel" name="label_4">
+     <property name="text">
+      <string>Additional note tag prefix:</string>
+     </property>
+    </widget>
+   </item>
+   <item row="3" column="1">
+    <widget class="QLineEdit" name="note_tag">
+     <property name="toolTip">
+      <string extracomment="Tooltip comment here"/>
+     </property>
+    </widget>
+   </item>
+   <item row="4" column="0">
+    <spacer name="verticalSpacer">
+     <property name="orientation">
+      <enum>Qt::Vertical</enum>
+     </property>
+     <property name="sizeHint" stdset="0">
+      <size>
+       <width>20</width>
+       <height>40</height>
+      </size>
+     </property>
+    </spacer>
+   </item>
+   <item row="0" column="1">
+    <widget class="QLineEdit" name="exclude_genre">
+     <property name="toolTip">
+      <string extracomment="Tooltip comment here"/>
+     </property>
+    </widget>
+   </item>
+  </layout>
+>>>>>>> MERGE-SOURCE
 </widget>
 <resources/>
 <connections/>
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1,4 +1,4 @@
-import pickle, os, re, shutil
+import pickle, os, re, shutil, htmlentitydefs

 from xml.sax.saxutils import escape

@ -291,7 +291,6 @@ class EPUB_MOBI(CatalogPlugin):
            # intToTranslate is a three-digit number

            tensComponentString = ""
-            hundredsComponenetString = ""

            hundredsComponent = intToTranslate - (intToTranslate % 100)
            tensComponent = intToTranslate % 100
@ -1366,7 +1365,7 @@ class EPUB_MOBI(CatalogPlugin):
                        # Check to see if cover is newer than thumbnail
                        # os.path.getmtime() = modified time
                        # os.path.ctime() = creation time
-                        cover_timestamp = os.path.getmtime(cover)
+                        cover_timestamp = os.path.getmtime(title['cover'])
                        thumb_timestamp = os.path.getmtime(thumb_fp)
                        if thumb_timestamp < cover_timestamp:
                            # if verbose: print "updating thumbnail for %s" % title['title']
@ -1470,7 +1469,6 @@ class EPUB_MOBI(CatalogPlugin):
            spine = soup.find('spine')
            stc = 0
            guide = soup.find('guide')
-            gtc = 0

            itemTag = Tag(soup, "item")
            itemTag['id'] = "ncx"
@ -1796,7 +1794,6 @@ class EPUB_MOBI(CatalogPlugin):
                print self.updateProgressFullStep("generateNCXByAuthor()")

            soup = self.ncxSoup
-            output = "ByAlphaAuthor"
            HTML_file = "content/ByAlphaAuthor.html"
            body = soup.find("navPoint")
            btc = len(body.contents)
@ -2036,7 +2033,6 @@ class EPUB_MOBI(CatalogPlugin):
                    content = node
            # Special handling for '&amp;' in 'cover'
            if key == 'cover' and re.search('&amp;',content):
-                hit =  re.search('&amp;',content)
                content = re.sub('&amp;','&',content)

            if content:
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
@ -7652,7 +7652,7 @@ msgstr ""
 "Tilgjengelige områder: alle, forfattersortering, forfattere, kommentarer, "
 "omslagsbilde, formater, id, isbn, publikasjonsdato, bedømmelse, "
 "serieinndeks, serier, størrelse, tidsmerker, tittel, uuid.\n"
-"Standard: %standard\n"
+"Standard: %default\n"
 "Gjelder: CSV, XML utdataformater"

 #: /home/kovid/work/calibre/src/calibre/library/catalog.py:34
@ -7665,7 +7665,7 @@ msgstr ""
 "Utdatafelter som kan sorteres.\n"
 "Tilgjengelige områder: forfattersortering, id, bedømmelse, størrelse, "
 "tidsmerking, tittel.\n"
-"Standard: %standard'\n"
+"Standard: '%default'\n"
 "Gjelder: CSV, XML utdataformater"

 #: /home/kovid/work/calibre/src/calibre/library/cli.py:121
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
@ -604,7 +604,7 @@ msgstr "Wykrycie dysku %s niemożliwe. Spróbuj ponownie uruchomić komputer."

 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:429
 msgid "Unable to detect the %s mount point. Try rebooting."
-msgstr "Nie można wykryć % s punkt montowania. Spróbuj zrestartować system."
+msgstr "Nie można wykryć %s punkt montowania. Spróbuj zrestartować system."

 #: /home/kovid/work/calibre/src/calibre/devices/usbms/device.py:484
 msgid "Unable to detect the %s disk drive."
@ -718,7 +718,7 @@ msgid ""
 msgstr ""
 "Nie udało się proces komiks: \n"
 "\n"
-"% s"
+"%s"

 #: /home/kovid/work/calibre/src/calibre/ebooks/comic/input.py:278
 msgid ""