KG changes + GwR changes

2025-06-23 15:30:45 -04:00 · 2010-06-18 06:35:31 -06:00 · 2010-06-18 06:35:31 -06:00 · 58e466f830
commit 58e466f830
parent 984f2cc443 4bb355481a
8 changed files with 91 additions and 58 deletions
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@ -64,6 +64,7 @@ class NYTimes(BasicNewsRecipe):
    timefmt = ''
    needs_subscription = True
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+    cover_margins = (18,18,'grey99')

    remove_tags_before = dict(id='article')
    remove_tags_after  = dict(id='article')
@ -183,6 +184,16 @@ class NYTimes(BasicNewsRecipe):
                self.log("\nFailed to login")
        return br

+    def skip_ad_pages(self, soup):
+        # Skip ad pages served before actual article
+        skip_tag = soup.find(True, {'name':'skip'})
+        if skip_tag is not None:
+            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
+            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
+            url += '?pagewanted=all'
+            self.log.warn("Skipping ad to article at '%s'" % url)
+            return self.index_to_soup(url, raw=True)
+
    def get_cover_url(self):
        cover = None
        st = time.localtime()
@ -391,14 +402,6 @@ class NYTimes(BasicNewsRecipe):
        return ans

    def preprocess_html(self, soup):
-        # Skip ad pages served before actual article
-        skip_tag = soup.find(True, {'name':'skip'})
-        if skip_tag is not None:
-            self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
-            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
-            url += '?pagewanted=all'
-            self.log.error("Skipping ad to article at '%s'" % url)
-            soup = self.index_to_soup(url)
        return self.strip_anchors(soup)

    def postprocess_html(self,soup, True):
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -103,6 +103,7 @@ class NYTimes(BasicNewsRecipe):
                            ]),
                   dict(name=['script', 'noscript', 'style'])]
    masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
+    cover_margins = (18,18,'grey99')
    no_stylesheets = True
    extra_css = '.headline      {text-align:    left;}\n    \
                 .byline        {font-family:   monospace;  \
@ -158,7 +159,7 @@ class NYTimes(BasicNewsRecipe):
        return cover

    def get_masthead_title(self):
-        return 'NYTimes GR Version'
+        return self.title

    def dump_ans(self, ans):
        total_article_count = 0
@ -279,15 +280,17 @@ class NYTimes(BasicNewsRecipe):
        self.dump_ans(ans)
        return ans

-    def preprocess_html(self, soup):
+    def skip_ad_pages(self, soup):
        # Skip ad pages served before actual article
        skip_tag = soup.find(True, {'name':'skip'})
        if skip_tag is not None:
-            self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
+            self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
            url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
            url += '?pagewanted=all'
-            self.log.error("Skipping ad to article at '%s'" % url)
-            soup = self.index_to_soup(url)
+            self.log.warn("Skipping ad to article at '%s'" % url)
+            return self.index_to_soup(url, raw=True)
+
+    def preprocess_html(self, soup):
        return self.strip_anchors(soup)

    def postprocess_html(self,soup, True):
--- a/resources/recipes/wsj.recipe
+++ b/resources/recipes/wsj.recipe
@ -3,7 +3,6 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'

-import string
 from calibre.web.feeds.news import BasicNewsRecipe

 # http://online.wsj.com/page/us_in_todays_paper.html
@ -66,7 +65,7 @@ class WallStreetJournal(BasicNewsRecipe):
        return soup

    def wsj_get_index(self):
-        return self.index_to_soup('http://online.wsj.com/page/us_in_todays_paper.html')
+        return self.index_to_soup('http://online.wsj.com/itp')

    def parse_index(self):
        soup = self.wsj_get_index()
@ -75,24 +74,35 @@ class WallStreetJournal(BasicNewsRecipe):
        if date is not None:
            self.timefmt = ' [%s]'%self.tag_to_string(date)

-        sections = {}
-        sec_order = []
+        cov = soup.find('a', attrs={'class':'icon pdf'}, href=True)
+        if cov is not None:
+            self.cover_url = cov['href']
+
+        feeds = []
+        div = soup.find('div', attrs={'class':'itpHeader'})
+        div = div.find('ul', attrs={'class':'tab'})
+        for a in div.findAll('a', href=lambda x: x and '/itp/' in x):
+            title = self.tag_to_string(a)
+            url = 'http://online.wsj.com' + a['href']
+            self.log('Found section:', title)
+            articles = self.wsj_find_articles(url)
+            if articles:
+                feeds.append((title, articles))
+
+        return feeds
+
+    def wsj_find_articles(self, url):
+        soup = self.index_to_soup(url)
+
+        whats_news = soup.find('div', attrs={'class':lambda x: x and
+            'whatsNews-simple' in x})
+        if whats_news is not None:
+            whats_news.extract()
+
+        articles = []
+
        for a in soup.findAll('a', attrs={'class':'mjLinkItem'}, href=True):
            container = a.findParent(['li', 'div'])
-            if container.name == 'div':
-                section = 'Page One'
-            else:
-                section = ''
-                sec = container.find('a', href=lambda x: x and '/search?' in x)
-                if sec is not None:
-                    section = self.tag_to_string(sec).strip()
-                if not section:
-                    h = container.find(['h1','h2','h3','h4','h5','h6'])
-                    section = self.tag_to_string(h)
-            section = string.capitalize(section).replace('U.s.', 'U.S.')
-            if section not in sections:
-                sections[section] = []
-                sec_order.append(section)
            meta = a.find(attrs={'class':'meta_sectionName'})
            if meta is not None:
                meta.extract()
@ -103,27 +113,28 @@ class WallStreetJournal(BasicNewsRecipe):
            if p is not None:
                desc = self.tag_to_string(p)

-            sections[section].append({'title':title, 'url':url,
+            articles.append({'title':title, 'url':url,
                'description':desc, 'date':''})

-            self.log('Found article:', title)
+            self.log('\tFound article:', title)

+            '''
+            # Find related articles
            a.extract()
            for a in container.findAll('a', href=lambda x: x and '/article/'
-                    in x):
+                    in x and 'articleTabs' not in x):
                url = a['href']
                if not url.startswith('http:'):
                    url = 'http://online.wsj.com'+url
                title = self.tag_to_string(a).strip()
                if not title or title.startswith('['): continue
                if title:
-                    sections[section].append({'title':self.tag_to_string(a),
+                    articles.append({'title':self.tag_to_string(a),
                        'url':url, 'description':'', 'date':''})
-                    self.log('\tFound related:', title)
+                    self.log('\t\tFound related:', title)
+            '''

-
-        feeds = [(sec, sections[sec]) for sec in sec_order]
-        return feeds
+        return articles


    def cleanup(self):
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -279,6 +279,7 @@ class KoboReaderOutput(OutputProfile):
    description = _('This profile is intended for the Kobo Reader.')

    screen_size               = (590, 775)
+    comic_screen_size         = (540, 718)
    dpi                       = 168.451
    fbase                     = 12
    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -8,7 +8,7 @@ import os, re
 from mimetypes import guess_type as guess_mimetype

 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
-
+from calibre.constants import iswindows
 from calibre.utils.chm.chm import CHMFile
 from calibre.utils.chm.chmlib import (
  CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
@ -135,10 +135,16 @@ class CHMReader(CHMFile):
            if lpath.find(';') != -1:
                # fix file names with ";<junk>" at the end, see _reformat()
                lpath = lpath.split(';')[0]
+            try:
                with open(lpath, 'wb') as f:
                    if guess_mimetype(path)[0] == ('text/html'):
                        data = self._reformat(data)
                    f.write(data)
+            except:
+                if iswindows and len(lpath) > 250:
+                    self.log.warn('%r filename too long, skipping'%path)
+                    continue
+                raise
        self._extracted = True
        files = os.listdir(output_dir)
        if self.hhc_path not in files:
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -689,14 +689,28 @@ class DeviceMixin(object): # {{{
            self.device_error_dialog.show()

    # Device connected {{{
-    def device_detected(self, connected, is_folder_device):
-        '''
-        Called when a device is connected to the computer.
-        '''
+
+    def set_device_menu_items_state(self, connected, is_folder_device):
        if connected:
            self._sync_menu.connect_to_folder_action.setEnabled(False)
            if is_folder_device:
                self._sync_menu.disconnect_from_folder_action.setEnabled(True)
+            self._sync_menu.enable_device_actions(True,
+                    self.device_manager.device.card_prefix(),
+                    self.device_manager.device)
+            self.eject_action.setEnabled(True)
+        else:
+            self._sync_menu.connect_to_folder_action.setEnabled(True)
+            self._sync_menu.disconnect_from_folder_action.setEnabled(False)
+            self._sync_menu.enable_device_actions(False)
+            self.eject_action.setEnabled(False)
+
+    def device_detected(self, connected, is_folder_device):
+        '''
+        Called when a device is connected to the computer.
+        '''
+        self.set_device_menu_items_state(connected, is_folder_device)
+        if connected:
            self.device_manager.get_device_information(\
                    Dispatcher(self.info_read))
            self.set_default_thumbnail(\
@ -705,17 +719,10 @@ class DeviceMixin(object): # {{{
                self.device_manager.device.__class__.get_gui_name()+\
                        _(' detected.'), 3000)
            self.device_connected = 'device' if not is_folder_device else 'folder'
-            self._sync_menu.enable_device_actions(True,
-                    self.device_manager.device.card_prefix(),
-                    self.device_manager.device)
            self.location_view.model().device_connected(self.device_manager.device)
-            self.eject_action.setEnabled(True)
            self.refresh_ondevice_info (device_connected = True, reset_only = True)
        else:
-            self._sync_menu.connect_to_folder_action.setEnabled(True)
-            self._sync_menu.disconnect_from_folder_action.setEnabled(False)
            self.device_connected = None
-            self._sync_menu.enable_device_actions(False)
            self.location_view.model().update_devices()
            self.vanity.setText(self.vanity_template%\
                    dict(version=self.latest_version, device=' '))
@ -723,7 +730,6 @@ class DeviceMixin(object): # {{{
            if self.current_view() != self.library_view:
                self.book_details.reset_info()
                self.location_view.setCurrentIndex(self.location_view.model().index(0))
-            self.eject_action.setEnabled(False)
            self.refresh_ondevice_info (device_connected = False)

    def info_read(self, job):
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -410,6 +410,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceMixin, ToolbarMixin, # {{{
            self.tags_view.set_new_model() # in case columns changed
            self.tags_view.recount()
            self.create_device_menu()
+            self.set_device_menu_items_state(bool(self.device_connected),
+                    self.device_connected == 'folder')

            if not patheq(self.library_path, d.database_location):
                newloc = d.database_location
--- a/src/calibre/utils/Zeroconf.py
+++ b/src/calibre/utils/Zeroconf.py
@ -596,10 +596,11 @@ class DNSIncoming(object):
                    next = off + 1
                off = ((len & 0x3F) << 8) | ord(self.data[off])
                if off >= first:
-                    raise 'Bad domain name (circular) at ' + str(off)
+                    raise ValueError('Bad domain name (circular) at ' +
+                            str(off))
                first = off
            else:
-                raise 'Bad domain name at ' + str(off)
+                raise ValueError('Bad domain name at ' + str(off))

        if next >= 0:
            self.offset = next