KG changes + GwR changes

This commit is contained in:
GRiker 2010-06-18 06:35:31 -06:00
commit 58e466f830
8 changed files with 91 additions and 58 deletions

View File

@ -64,6 +64,7 @@ class NYTimes(BasicNewsRecipe):
timefmt = ''
needs_subscription = True
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
cover_margins = (18,18,'grey99')
remove_tags_before = dict(id='article')
remove_tags_after = dict(id='article')
@ -183,6 +184,16 @@ class NYTimes(BasicNewsRecipe):
self.log("\nFailed to login")
return br
def skip_ad_pages(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.warn("Skipping ad to article at '%s'" % url)
return self.index_to_soup(url, raw=True)
def get_cover_url(self):
cover = None
st = time.localtime()
@ -391,14 +402,6 @@ class NYTimes(BasicNewsRecipe):
return ans
def preprocess_html(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.error("Skipping ad to article at '%s'" % url)
soup = self.index_to_soup(url)
return self.strip_anchors(soup)
def postprocess_html(self,soup, True):

View File

@ -103,6 +103,7 @@ class NYTimes(BasicNewsRecipe):
]),
dict(name=['script', 'noscript', 'style'])]
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
cover_margins = (18,18,'grey99')
no_stylesheets = True
extra_css = '.headline {text-align: left;}\n \
.byline {font-family: monospace; \
@ -158,7 +159,7 @@ class NYTimes(BasicNewsRecipe):
return cover
def get_masthead_title(self):
return 'NYTimes GR Version'
return self.title
def dump_ans(self, ans):
total_article_count = 0
@ -279,15 +280,17 @@ class NYTimes(BasicNewsRecipe):
self.dump_ans(ans)
return ans
def preprocess_html(self, soup):
def skip_ad_pages(self, soup):
# Skip ad pages served before actual article
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
self.log.error("Found forwarding link: %s" % skip_tag.parent['href'])
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.error("Skipping ad to article at '%s'" % url)
soup = self.index_to_soup(url)
self.log.warn("Skipping ad to article at '%s'" % url)
return self.index_to_soup(url, raw=True)
def preprocess_html(self, soup):
return self.strip_anchors(soup)
def postprocess_html(self,soup, True):

View File

@ -3,7 +3,6 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
import string
from calibre.web.feeds.news import BasicNewsRecipe
# http://online.wsj.com/page/us_in_todays_paper.html
@ -66,7 +65,7 @@ class WallStreetJournal(BasicNewsRecipe):
return soup
def wsj_get_index(self):
return self.index_to_soup('http://online.wsj.com/page/us_in_todays_paper.html')
return self.index_to_soup('http://online.wsj.com/itp')
def parse_index(self):
soup = self.wsj_get_index()
@ -75,24 +74,35 @@ class WallStreetJournal(BasicNewsRecipe):
if date is not None:
self.timefmt = ' [%s]'%self.tag_to_string(date)
sections = {}
sec_order = []
cov = soup.find('a', attrs={'class':'icon pdf'}, href=True)
if cov is not None:
self.cover_url = cov['href']
feeds = []
div = soup.find('div', attrs={'class':'itpHeader'})
div = div.find('ul', attrs={'class':'tab'})
for a in div.findAll('a', href=lambda x: x and '/itp/' in x):
title = self.tag_to_string(a)
url = 'http://online.wsj.com' + a['href']
self.log('Found section:', title)
articles = self.wsj_find_articles(url)
if articles:
feeds.append((title, articles))
return feeds
def wsj_find_articles(self, url):
soup = self.index_to_soup(url)
whats_news = soup.find('div', attrs={'class':lambda x: x and
'whatsNews-simple' in x})
if whats_news is not None:
whats_news.extract()
articles = []
for a in soup.findAll('a', attrs={'class':'mjLinkItem'}, href=True):
container = a.findParent(['li', 'div'])
if container.name == 'div':
section = 'Page One'
else:
section = ''
sec = container.find('a', href=lambda x: x and '/search?' in x)
if sec is not None:
section = self.tag_to_string(sec).strip()
if not section:
h = container.find(['h1','h2','h3','h4','h5','h6'])
section = self.tag_to_string(h)
section = string.capitalize(section).replace('U.s.', 'U.S.')
if section not in sections:
sections[section] = []
sec_order.append(section)
meta = a.find(attrs={'class':'meta_sectionName'})
if meta is not None:
meta.extract()
@ -103,27 +113,28 @@ class WallStreetJournal(BasicNewsRecipe):
if p is not None:
desc = self.tag_to_string(p)
sections[section].append({'title':title, 'url':url,
articles.append({'title':title, 'url':url,
'description':desc, 'date':''})
self.log('Found article:', title)
self.log('\tFound article:', title)
'''
# Find related articles
a.extract()
for a in container.findAll('a', href=lambda x: x and '/article/'
in x):
in x and 'articleTabs' not in x):
url = a['href']
if not url.startswith('http:'):
url = 'http://online.wsj.com'+url
title = self.tag_to_string(a).strip()
if not title or title.startswith('['): continue
if title:
sections[section].append({'title':self.tag_to_string(a),
articles.append({'title':self.tag_to_string(a),
'url':url, 'description':'', 'date':''})
self.log('\tFound related:', title)
self.log('\t\tFound related:', title)
'''
feeds = [(sec, sections[sec]) for sec in sec_order]
return feeds
return articles
def cleanup(self):

View File

@ -279,6 +279,7 @@ class KoboReaderOutput(OutputProfile):
description = _('This profile is intended for the Kobo Reader.')
screen_size = (590, 775)
comic_screen_size = (540, 718)
dpi = 168.451
fbase = 12
fsizes = [7.5, 9, 10, 12, 15.5, 20, 22, 24]

View File

@ -8,7 +8,7 @@ import os, re
from mimetypes import guess_type as guess_mimetype
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
from calibre.constants import iswindows
from calibre.utils.chm.chm import CHMFile
from calibre.utils.chm.chmlib import (
CHM_RESOLVE_SUCCESS, CHM_ENUMERATE_NORMAL,
@ -135,10 +135,16 @@ class CHMReader(CHMFile):
if lpath.find(';') != -1:
# fix file names with ";<junk>" at the end, see _reformat()
lpath = lpath.split(';')[0]
try:
with open(lpath, 'wb') as f:
if guess_mimetype(path)[0] == ('text/html'):
data = self._reformat(data)
f.write(data)
except:
if iswindows and len(lpath) > 250:
self.log.warn('%r filename too long, skipping'%path)
continue
raise
self._extracted = True
files = os.listdir(output_dir)
if self.hhc_path not in files:

View File

@ -689,14 +689,28 @@ class DeviceMixin(object): # {{{
self.device_error_dialog.show()
# Device connected {{{
def device_detected(self, connected, is_folder_device):
'''
Called when a device is connected to the computer.
'''
def set_device_menu_items_state(self, connected, is_folder_device):
if connected:
self._sync_menu.connect_to_folder_action.setEnabled(False)
if is_folder_device:
self._sync_menu.disconnect_from_folder_action.setEnabled(True)
self._sync_menu.enable_device_actions(True,
self.device_manager.device.card_prefix(),
self.device_manager.device)
self.eject_action.setEnabled(True)
else:
self._sync_menu.connect_to_folder_action.setEnabled(True)
self._sync_menu.disconnect_from_folder_action.setEnabled(False)
self._sync_menu.enable_device_actions(False)
self.eject_action.setEnabled(False)
def device_detected(self, connected, is_folder_device):
'''
Called when a device is connected to the computer.
'''
self.set_device_menu_items_state(connected, is_folder_device)
if connected:
self.device_manager.get_device_information(\
Dispatcher(self.info_read))
self.set_default_thumbnail(\
@ -705,17 +719,10 @@ class DeviceMixin(object): # {{{
self.device_manager.device.__class__.get_gui_name()+\
_(' detected.'), 3000)
self.device_connected = 'device' if not is_folder_device else 'folder'
self._sync_menu.enable_device_actions(True,
self.device_manager.device.card_prefix(),
self.device_manager.device)
self.location_view.model().device_connected(self.device_manager.device)
self.eject_action.setEnabled(True)
self.refresh_ondevice_info (device_connected = True, reset_only = True)
else:
self._sync_menu.connect_to_folder_action.setEnabled(True)
self._sync_menu.disconnect_from_folder_action.setEnabled(False)
self.device_connected = None
self._sync_menu.enable_device_actions(False)
self.location_view.model().update_devices()
self.vanity.setText(self.vanity_template%\
dict(version=self.latest_version, device=' '))
@ -723,7 +730,6 @@ class DeviceMixin(object): # {{{
if self.current_view() != self.library_view:
self.book_details.reset_info()
self.location_view.setCurrentIndex(self.location_view.model().index(0))
self.eject_action.setEnabled(False)
self.refresh_ondevice_info (device_connected = False)
def info_read(self, job):

View File

@ -410,6 +410,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceMixin, ToolbarMixin, # {{{
self.tags_view.set_new_model() # in case columns changed
self.tags_view.recount()
self.create_device_menu()
self.set_device_menu_items_state(bool(self.device_connected),
self.device_connected == 'folder')
if not patheq(self.library_path, d.database_location):
newloc = d.database_location

View File

@ -596,10 +596,11 @@ class DNSIncoming(object):
next = off + 1
off = ((len & 0x3F) << 8) | ord(self.data[off])
if off >= first:
raise 'Bad domain name (circular) at ' + str(off)
raise ValueError('Bad domain name (circular) at ' +
str(off))
first = off
else:
raise 'Bad domain name at ' + str(off)
raise ValueError('Bad domain name at ' + str(off))
if next >= 0:
self.offset = next