diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe index ec546569e5..62e233add9 100644 --- a/resources/recipes/nytimes.recipe +++ b/resources/recipes/nytimes.recipe @@ -64,6 +64,7 @@ class NYTimes(BasicNewsRecipe): timefmt = '' needs_subscription = True masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' + cover_margins = (18,18,'grey99') remove_tags_before = dict(id='article') remove_tags_after = dict(id='article') @@ -183,6 +184,16 @@ class NYTimes(BasicNewsRecipe): self.log("\nFailed to login") return br + def skip_ad_pages(self, soup): + # Skip ad pages served before actual article + skip_tag = soup.find(True, {'name':'skip'}) + if skip_tag is not None: + self.log.warn("Found forwarding link: %s" % skip_tag.parent['href']) + url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) + url += '?pagewanted=all' + self.log.warn("Skipping ad to article at '%s'" % url) + return self.index_to_soup(url, raw=True) + def get_cover_url(self): cover = None st = time.localtime() @@ -391,14 +402,6 @@ class NYTimes(BasicNewsRecipe): return ans def preprocess_html(self, soup): - # Skip ad pages served before actual article - skip_tag = soup.find(True, {'name':'skip'}) - if skip_tag is not None: - self.log.error("Found forwarding link: %s" % skip_tag.parent['href']) - url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) - url += '?pagewanted=all' - self.log.error("Skipping ad to article at '%s'" % url) - soup = self.index_to_soup(url) return self.strip_anchors(soup) def postprocess_html(self,soup, True): diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index c08b06572d..7fac771dd7 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -103,6 +103,7 @@ class NYTimes(BasicNewsRecipe): ]), dict(name=['script', 'noscript', 'style'])] masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif' + cover_margins = (18,18,'grey99') no_stylesheets = True extra_css = '.headline {text-align: left;}\n \ .byline {font-family: monospace; \ @@ -158,7 +159,7 @@ class NYTimes(BasicNewsRecipe): return cover def get_masthead_title(self): - return 'NYTimes GR Version' + return self.title def dump_ans(self, ans): total_article_count = 0 @@ -279,15 +280,17 @@ class NYTimes(BasicNewsRecipe): self.dump_ans(ans) return ans - def preprocess_html(self, soup): + def skip_ad_pages(self, soup): # Skip ad pages served before actual article skip_tag = soup.find(True, {'name':'skip'}) if skip_tag is not None: - self.log.error("Found forwarding link: %s" % skip_tag.parent['href']) + self.log.warn("Found forwarding link: %s" % skip_tag.parent['href']) url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) url += '?pagewanted=all' - self.log.error("Skipping ad to article at '%s'" % url) - soup = self.index_to_soup(url) + self.log.warn("Skipping ad to article at '%s'" % url) + return self.index_to_soup(url, raw=True) + + def preprocess_html(self, soup): return self.strip_anchors(soup) def postprocess_html(self,soup, True): diff --git a/resources/recipes/wsj.recipe b/resources/recipes/wsj.recipe index e8e29505c4..2e99a690f4 100644 --- a/resources/recipes/wsj.recipe +++ b/resources/recipes/wsj.recipe @@ -3,7 +3,6 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' -import string from calibre.web.feeds.news import BasicNewsRecipe # http://online.wsj.com/page/us_in_todays_paper.html @@ -66,7 +65,7 @@ class WallStreetJournal(BasicNewsRecipe): return soup def wsj_get_index(self): - return self.index_to_soup('http://online.wsj.com/page/us_in_todays_paper.html') + return self.index_to_soup('http://online.wsj.com/itp') def parse_index(self): soup = self.wsj_get_index() @@ -75,24 +74,35 @@ class WallStreetJournal(BasicNewsRecipe): if date is not None: self.timefmt = ' [%s]'%self.tag_to_string(date) - sections = {} - sec_order = [] + cov = soup.find('a', attrs={'class':'icon pdf'}, href=True) + if cov is not None: + self.cover_url = cov['href'] + + feeds = [] + div = soup.find('div', attrs={'class':'itpHeader'}) + div = div.find('ul', attrs={'class':'tab'}) + for a in div.findAll('a', href=lambda x: x and '/itp/' in x): + title = self.tag_to_string(a) + url = 'http://online.wsj.com' + a['href'] + self.log('Found section:', title) + articles = self.wsj_find_articles(url) + if articles: + feeds.append((title, articles)) + + return feeds + + def wsj_find_articles(self, url): + soup = self.index_to_soup(url) + + whats_news = soup.find('div', attrs={'class':lambda x: x and + 'whatsNews-simple' in x}) + if whats_news is not None: + whats_news.extract() + + articles = [] + for a in soup.findAll('a', attrs={'class':'mjLinkItem'}, href=True): container = a.findParent(['li', 'div']) - if container.name == 'div': - section = 'Page One' - else: - section = '' - sec = container.find('a', href=lambda x: x and '/search?' in x) - if sec is not None: - section = self.tag_to_string(sec).strip() - if not section: - h = container.find(['h1','h2','h3','h4','h5','h6']) - section = self.tag_to_string(h) - section = string.capitalize(section).replace('U.s.', 'U.S.') - if section not in sections: - sections[section] = [] - sec_order.append(section) meta = a.find(attrs={'class':'meta_sectionName'}) if meta is not None: meta.extract() @@ -103,27 +113,28 @@ class WallStreetJournal(BasicNewsRecipe): if p is not None: desc = self.tag_to_string(p) - sections[section].append({'title':title, 'url':url, + articles.append({'title':title, 'url':url, 'description':desc, 'date':''}) - self.log('Found article:', title) + self.log('\tFound article:', title) + ''' + # Find related articles a.extract() for a in container.findAll('a', href=lambda x: x and '/article/' - in x): + in x and 'articleTabs' not in x): url = a['href'] if not url.startswith('http:'): url = 'http://online.wsj.com'+url title = self.tag_to_string(a).strip() if not title or title.startswith('['): continue if title: - sections[section].append({'title':self.tag_to_string(a), + articles.append({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''}) - self.log('\tFound related:', title) + self.log('\t\tFound related:', title) + ''' - - feeds = [(sec, sections[sec]) for sec in sec_order] - return feeds + return articles def cleanup(self): diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 222f6128e0..c872c9df38 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -279,6 +279,7 @@ class KoboReaderOutput(OutputProfile): description = _('This profile is intended for the Kobo Reader.') screen_size = (590, 775) + comic_screen_size = (540, 718) dpi = 168.451 fbase = 12 fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24] diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index bbb43af567..d0a81e8e7f 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -8,7 +8,7 @@ import os, re from mimetypes import guess_type as guess_mimetype from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString - +from calibre.constants import iswindows from calibre.utils.chm.chm import CHMFile from calibre.utils.chm.chmlib import ( CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL, @@ -135,10 +135,16 @@ class CHMReader(CHMFile): if lpath.find(';') != -1: # fix file names with ";" at the end, see _reformat() lpath = lpath.split(';')[0] - with open(lpath, 'wb') as f: - if guess_mimetype(path)[0] == ('text/html'): - data = self._reformat(data) - f.write(data) + try: + with open(lpath, 'wb') as f: + if guess_mimetype(path)[0] == ('text/html'): + data = self._reformat(data) + f.write(data) + except: + if iswindows and len(lpath) > 250: + self.log.warn('%r filename too long, skipping'%path) + continue + raise self._extracted = True files = os.listdir(output_dir) if self.hhc_path not in files: diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index d00dd2782c..07b5063e6c 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -689,14 +689,28 @@ class DeviceMixin(object): # {{{ self.device_error_dialog.show() # Device connected {{{ - def device_detected(self, connected, is_folder_device): - ''' - Called when a device is connected to the computer. - ''' + + def set_device_menu_items_state(self, connected, is_folder_device): if connected: self._sync_menu.connect_to_folder_action.setEnabled(False) if is_folder_device: self._sync_menu.disconnect_from_folder_action.setEnabled(True) + self._sync_menu.enable_device_actions(True, + self.device_manager.device.card_prefix(), + self.device_manager.device) + self.eject_action.setEnabled(True) + else: + self._sync_menu.connect_to_folder_action.setEnabled(True) + self._sync_menu.disconnect_from_folder_action.setEnabled(False) + self._sync_menu.enable_device_actions(False) + self.eject_action.setEnabled(False) + + def device_detected(self, connected, is_folder_device): + ''' + Called when a device is connected to the computer. + ''' + self.set_device_menu_items_state(connected, is_folder_device) + if connected: self.device_manager.get_device_information(\ Dispatcher(self.info_read)) self.set_default_thumbnail(\ @@ -705,17 +719,10 @@ class DeviceMixin(object): # {{{ self.device_manager.device.__class__.get_gui_name()+\ _(' detected.'), 3000) self.device_connected = 'device' if not is_folder_device else 'folder' - self._sync_menu.enable_device_actions(True, - self.device_manager.device.card_prefix(), - self.device_manager.device) self.location_view.model().device_connected(self.device_manager.device) - self.eject_action.setEnabled(True) self.refresh_ondevice_info (device_connected = True, reset_only = True) else: - self._sync_menu.connect_to_folder_action.setEnabled(True) - self._sync_menu.disconnect_from_folder_action.setEnabled(False) self.device_connected = None - self._sync_menu.enable_device_actions(False) self.location_view.model().update_devices() self.vanity.setText(self.vanity_template%\ dict(version=self.latest_version, device=' ')) @@ -723,7 +730,6 @@ class DeviceMixin(object): # {{{ if self.current_view() != self.library_view: self.book_details.reset_info() self.location_view.setCurrentIndex(self.location_view.model().index(0)) - self.eject_action.setEnabled(False) self.refresh_ondevice_info (device_connected = False) def info_read(self, job): diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index aa2d94a637..6452890883 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -410,6 +410,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceMixin, ToolbarMixin, # {{{ self.tags_view.set_new_model() # in case columns changed self.tags_view.recount() self.create_device_menu() + self.set_device_menu_items_state(bool(self.device_connected), + self.device_connected == 'folder') if not patheq(self.library_path, d.database_location): newloc = d.database_location diff --git a/src/calibre/utils/Zeroconf.py b/src/calibre/utils/Zeroconf.py index 8a1e13c23f..f4a7119d16 100755 --- a/src/calibre/utils/Zeroconf.py +++ b/src/calibre/utils/Zeroconf.py @@ -596,10 +596,11 @@ class DNSIncoming(object): next = off + 1 off = ((len & 0x3F) << 8) | ord(self.data[off]) if off >= first: - raise 'Bad domain name (circular) at ' + str(off) + raise ValueError('Bad domain name (circular) at ' + + str(off)) first = off else: - raise 'Bad domain name at ' + str(off) + raise ValueError('Bad domain name at ' + str(off)) if next >= 0: self.offset = next