GwR and KG revisions pre 6.35

2025-12-14 09:05:16 -05:00 · 2010-01-22 11:50:40 -07:00 · 2010-01-22 11:50:40 -07:00 · 636cfbb769
commit 636cfbb769
parent 6a8b4f9a21 3a1d39422a
8 changed files with 595 additions and 477 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -119,6 +119,8 @@
    - title: stuff.co.nz
      author: Krittika Goyal
    - title: Editor and Publisher
      author: XanthanGum
  improved recipes:
    - Physics Today
@ -127,6 +129,7 @@
    - FTD
    - The National Post
    - Blic
    - Ars Technica
 - version: 0.6.34
--- a/resources/recipes/ars_technica.recipe
+++ b/resources/recipes/ars_technica.recipe
@ -1,12 +1,12 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
-__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 arstechnica.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 class ArsTechnica2(BasicNewsRecipe):
    title                 = u'Ars Technica'
@ -18,24 +18,24 @@ class ArsTechnica2(BasicNewsRecipe):
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'utf8'
+    encoding              = 'utf-8'
    remove_javascript     = True
    use_embedded_content  = False
    extra_css             = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
-    extra_css = '''
+    conversion_options = {
-                    .news-item-title{font-size: medium ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
+                             'comments'  : description
-                    .news-item-teaser{font-size: small ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
+                            ,'tags'      : category
-                    .news-item-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
+                            ,'language'  : language
-                    .news-item-text{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
+                            ,'publisher' : publisher
-                    .news-item-figure-caption-text{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:bold;}
+                         }
                    .news-item-figure-caption-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
                '''
-    keep_only_tags = [dict(name='div', attrs={'id':['news-item-info','news-item']})]
+
    keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
    remove_tags = [
                     dict(name=['object','link','embed'])
-                    ,dict(name='div', attrs={'class':'related-stories'})
+                    ,dict(name='div', attrs={'class':'read-more-link'})
                  ]
@ -52,14 +52,19 @@ class ArsTechnica2(BasicNewsRecipe):
            ]
    def append_page(self, soup, appendtag, position):
-        pager = soup.find('div',attrs={'id':'pager'})
+        pager = soup.find('div',attrs={'class':'pager'})
        if pager:
           for atag in pager.findAll('a',href=True):
               str = self.tag_to_string(atag)
               if str.startswith('Next'):
-                  soup2 = self.index_to_soup(atag['href'])
+                  nurl = 'http://arstechnica.com' + atag['href']
                  rawc = self.index_to_soup(nurl,True)
                  soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
-                  texttag = soup2.find('div', attrs={'class':'news-item-text'})
+                  readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
                  if readmoretag:
                     readmoretag.extract()
                  texttag = soup2.find('div', attrs={'class':'body'})
                  for it in texttag.findAll(style=True):
                      del it['style']
@ -71,10 +76,12 @@ class ArsTechnica2(BasicNewsRecipe):
    def preprocess_html(self, soup):
-
+        ftag = soup.find('div', attrs={'class':'byline'})
        ftag = soup.find('div', attrs={'class':'news-item-byline'})
        if ftag:
-           ftag.insert(4,'<br /><br />')
+           brtag = Tag(soup,'br')
           brtag2 = Tag(soup,'br')
           ftag.insert(4,brtag)
           ftag.insert(5,brtag2)
        for item in soup.findAll(style=True):
           del item['style']
@ -83,5 +90,7 @@ class ArsTechnica2(BasicNewsRecipe):
        return soup
    def get_article_url(self, article):
        return article.get('feedburner_origlink',  None).rpartition('?')[0]
--- a/resources/recipes/editor_and_publisher.recipe
+++ b/resources/recipes/editor_and_publisher.recipe
@ -0,0 +1,34 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class EandP(BasicNewsRecipe):
    title              = u'Editor and Publisher'
    __author__         = u'Xanthan Gum'
    description        = 'News about newspapers and journalism.'
    language = 'en'
    no_stylesheets     = True
    oldest_article = 7
    max_articles_per_feed = 100
    # Font formatting code borrowed from kwetal
    extra_css = '''
                 body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
                 h1{font-size: xx-large;}
                 h2{font-size: large;}
                '''
    # Delete everything before the article
    remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'})
    # Delete everything after the article
    preprocess_regexps = [(re.compile(r'<!--endclickprintinclude-->.*</body>', re.DOTALL|re.IGNORECASE),
                           lambda match: '</body>'),]
    feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'),
             (u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'),
             (u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'),
             (u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'),
             (u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -34,23 +34,19 @@ class PluginWidget(QWidget,Ui_Form):
    def initialize(self, name):
        self.name = name
        # Restore options from last use here
        print "gui2.catalog.catalog_epub_mobi:initialize(): Retrieving options"
        for opt in self.OPTION_FIELDS:
-            opt_value = gprefs[self.name + '_' + opt[0]]
+            opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
-            print "Restoring %s: %s" % (self.name + '_' + opt[0], opt_value)
+            getattr(self, opt[0]).setText(opt_value)
            setattr(self,opt[0], unicode(opt_value))
    def options(self):
        # Save/return the current options
        # getattr() returns text value of QLineEdit control
        print "gui2.catalog.catalog_epub_mobi:options(): Saving options"
        opts_dict = {}
        for opt in self.OPTION_FIELDS:
-            opt_value = unicode(getattr(self,opt[0]))
+            opt_value = unicode(getattr(self, opt[0]).text())
            print "writing %s to gprefs" % opt_value
            gprefs.set(self.name + '_' + opt[0], opt_value)
-            opts_dict[opt[0]] = opt_value.split(',')
+            if opt[0] == 'exclude_tags':
                opt_value = opt_value.split(',')
            opts_dict[opt[0]] = opt_value
        opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@ -13,6 +13,7 @@
  <property name="windowTitle">
   <string>Form</string>
  </property>
 <<<<<<< TREE
  <widget class="QLabel" name="label">
   <property name="geometry">
    <rect>
@ -158,6 +159,85 @@
    <set>Qt::AlignCenter</set>
   </property>
  </widget>
 =======
  <layout class="QGridLayout" name="gridLayout">
   <item row="0" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>Tags to exclude as genres (regex):</string>
     </property>
     <property name="textFormat">
      <enum>Qt::LogText</enum>
     </property>
     <property name="wordWrap">
      <bool>true</bool>
     </property>
    </widget>
   </item>
   <item row="1" column="0">
    <widget class="QLabel" name="label_2">
     <property name="text">
      <string>'Don't include this book' tag:</string>
     </property>
    </widget>
   </item>
   <item row="1" column="1">
    <widget class="QLineEdit" name="exclude_tags">
     <property name="toolTip">
      <string extracomment="Tooltip comment here"/>
     </property>
    </widget>
   </item>
   <item row="2" column="0">
    <widget class="QLabel" name="label_3">
     <property name="text">
      <string>'Mark this book as read' tag:</string>
     </property>
    </widget>
   </item>
   <item row="2" column="1">
    <widget class="QLineEdit" name="read_tag">
     <property name="toolTip">
      <string extracomment="Tooltip comment here"/>
     </property>
    </widget>
   </item>
   <item row="3" column="0">
    <widget class="QLabel" name="label_4">
     <property name="text">
      <string>Additional note tag prefix:</string>
     </property>
    </widget>
   </item>
   <item row="3" column="1">
    <widget class="QLineEdit" name="note_tag">
     <property name="toolTip">
      <string extracomment="Tooltip comment here"/>
     </property>
    </widget>
   </item>
   <item row="4" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
     </property>
     <property name="sizeHint" stdset="0">
      <size>
       <width>20</width>
       <height>40</height>
      </size>
     </property>
    </spacer>
   </item>
   <item row="0" column="1">
    <widget class="QLineEdit" name="exclude_genre">
     <property name="toolTip">
      <string extracomment="Tooltip comment here"/>
     </property>
    </widget>
   </item>
  </layout>
 >>>>>>> MERGE-SOURCE
 </widget>
 <resources/>
 <connections/>
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@ -1,4 +1,4 @@
-import pickle, os, re, shutil
+import pickle, os, re, shutil, htmlentitydefs
 from xml.sax.saxutils import escape
@ -291,7 +291,6 @@ class EPUB_MOBI(CatalogPlugin):
            # intToTranslate is a three-digit number
            tensComponentString = ""
            hundredsComponenetString = ""
            hundredsComponent = intToTranslate - (intToTranslate % 100)
            tensComponent = intToTranslate % 100
@ -1366,7 +1365,7 @@ class EPUB_MOBI(CatalogPlugin):
                        # Check to see if cover is newer than thumbnail
                        # os.path.getmtime() = modified time
                        # os.path.ctime() = creation time
-                        cover_timestamp = os.path.getmtime(cover)
+                        cover_timestamp = os.path.getmtime(title['cover'])
                        thumb_timestamp = os.path.getmtime(thumb_fp)
                        if thumb_timestamp < cover_timestamp:
                            # if verbose: print "updating thumbnail for %s" % title['title']
@ -1470,7 +1469,6 @@ class EPUB_MOBI(CatalogPlugin):
            spine = soup.find('spine')
            stc = 0
            guide = soup.find('guide')
            gtc = 0
            itemTag = Tag(soup, "item")
            itemTag['id'] = "ncx"
@ -1796,7 +1794,6 @@ class EPUB_MOBI(CatalogPlugin):
                print self.updateProgressFullStep("generateNCXByAuthor()")
            soup = self.ncxSoup
            output = "ByAlphaAuthor"
            HTML_file = "content/ByAlphaAuthor.html"
            body = soup.find("navPoint")
            btc = len(body.contents)
@ -2036,7 +2033,6 @@ class EPUB_MOBI(CatalogPlugin):
                    content = node
            # Special handling for '&amp;' in 'cover'
            if key == 'cover' and re.search('&amp;',content):
                hit =  re.search('&amp;',content)
                content = re.sub('&amp;','&',content)
            if content:
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
@ -7652,7 +7652,7 @@ msgstr ""
 "Tilgjengelige områder: alle, forfattersortering, forfattere, kommentarer, "
 "omslagsbilde, formater, id, isbn, publikasjonsdato, bedømmelse, "
 "serieinndeks, serier, størrelse, tidsmerker, tittel, uuid.\n"
-"Standard: %standard\n"
+"Standard: %default\n"
 "Gjelder: CSV, XML utdataformater"
 #: /home/kovid/work/calibre/src/calibre/library/catalog.py:34
@ -7665,7 +7665,7 @@ msgstr ""
 "Utdatafelter som kan sorteres.\n"
 "Tilgjengelige områder: forfattersortering, id, bedømmelse, størrelse, "
 "tidsmerking, tittel.\n"
-"Standard: %standard'\n"
+"Standard: '%default'\n"
 "Gjelder: CSV, XML utdataformater"
 #: /home/kovid/work/calibre/src/calibre/library/cli.py:121