diff --git a/resources/recipes/wsj_free.recipe b/resources/recipes/wsj_free.recipe
deleted file mode 100644
index e29bfe3dde..0000000000
--- a/resources/recipes/wsj_free.recipe
+++ /dev/null
@@ -1,314 +0,0 @@
-#!/usr/bin/env python
-
-__license__ = 'GPL v3'
-
-'''
-online.wsj.com
-'''
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import Tag, NavigableString
-from datetime import timedelta, date
-
-class WSJ(BasicNewsRecipe):
- # formatting adapted from original recipe by Kovid Goyal and Sujata Raman
- title = u'Wall Street Journal (free)'
- __author__ = 'Nick Redding'
- language = 'en'
- description = ('All the free content from the Wall Street Journal (business, financial and political news)')
-
- no_stylesheets = True
- timefmt = ' [%b %d]'
-
- # customization notes: delete sections you are not interested in
- # set omit_paid_content to False if you want the paid content article snippets
- # set oldest_article to the maximum number of days back from today to include articles
- sectionlist = [
- ['/home-page','Front Page'],
- ['/public/page/news-opinion-commentary.html','Commentary'],
- ['/public/page/news-global-world.html','World News'],
- ['/public/page/news-world-business.html','US News'],
- ['/public/page/news-business-us.html','Business'],
- ['/public/page/news-financial-markets-stock.html','Markets'],
- ['/public/page/news-tech-technology.html','Technology'],
- ['/public/page/news-personal-finance.html','Personal Finnce'],
- ['/public/page/news-lifestyle-arts-entertainment.html','Life & Style'],
- ['/public/page/news-real-estate-homes.html','Real Estate'],
- ['/public/page/news-career-jobs.html','Careers'],
- ['/public/page/news-small-business-marketing.html','Small Business']
- ]
- oldest_article = 2
- omit_paid_content = True
-
- extra_css = '''h1{font-size:large; font-family:Times,serif;}
- h2{font-family:Times,serif; font-size:small; font-style:italic;}
- .subhead{font-family:Times,serif; font-size:small; font-style:italic;}
- .insettipUnit {font-family:Times,serif;font-size:xx-small;}
- .targetCaption{font-size:x-small; font-family:Times,serif; font-style:italic; margin-top: 0.25em;}
- .article{font-family:Times,serif; font-size:x-small;}
- .tagline { font-size:xx-small;}
- .dateStamp {font-family:Times,serif;}
- h3{font-family:Times,serif; font-size:xx-small;}
- .byline {font-family:Times,serif; font-size:xx-small; list-style-type: none;}
- .metadataType-articleCredits {list-style-type: none;}
- h6{font-family:Times,serif; font-size:small; font-style:italic;}
- .paperLocation{font-size:xx-small;}'''
-
-
- remove_tags_before = dict({'class':re.compile('^articleHeadlineBox')})
- remove_tags = [ dict({'id':re.compile('^articleTabs_tab_')}),
- #dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
- # "articleTabs_tab_interactive","articleTabs_tab_video",
- # "articleTabs_tab_map","articleTabs_tab_slideshow"]),
- {'class': ['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
- 'insettip','insetClose','more_in', "insetContent",
- # 'articleTools_bottom','articleTools_bottom mjArticleTools',
- 'aTools', 'tooltip',
- 'adSummary', 'nav-inline','insetFullBracket']},
- dict({'class':re.compile('^articleTools_bottom')}),
- dict(rel='shortcut icon')
- ]
- remove_tags_after = [dict(id="article_story_body"), {'class':"article story"}]
-
- def get_browser(self):
- br = BasicNewsRecipe.get_browser()
- return br
-
-
- def preprocess_html(self,soup):
-
- def decode_us_date(datestr):
- udate = datestr.strip().lower().split()
- m = ['january','february','march','april','may','june','july','august','september','october','november','december'].index(udate[0])+1
- d = int(udate[1])
- y = int(udate[2])
- return date(y,m,d)
-
- # check if article is paid content
- if self.omit_paid_content:
- divtags = soup.findAll('div','tooltip')
- if divtags:
- for divtag in divtags:
- if divtag.find(text="Subscriber Content"):
- return None
-
- # check if article is too old
- datetag = soup.find('li',attrs={'class' : re.compile("^dateStamp")})
- if datetag:
- dateline_string = self.tag_to_string(datetag,False)
- date_items = dateline_string.split(',')
- datestring = date_items[0]+date_items[1]
- article_date = decode_us_date(datestring)
- earliest_date = date.today() - timedelta(days=self.oldest_article)
- if article_date < earliest_date:
- self.log("Skipping article dated %s" % datestring)
- return None
- datetag.parent.extract()
-
- # place dateline in article heading
-
- bylinetag = soup.find('h3','byline')
- if bylinetag:
- h3bylinetag = bylinetag
- else:
- bylinetag = soup.find('li','byline')
- if bylinetag:
- h3bylinetag = bylinetag.h3
- if not h3bylinetag:
- h3bylinetag = bylinetag
- bylinetag = bylinetag.parent
- if bylinetag:
- if h3bylinetag.a:
- bylinetext = 'By '+self.tag_to_string(h3bylinetag.a,False)
- else:
- bylinetext = self.tag_to_string(h3bylinetag,False)
- h3byline = Tag(soup,'h3',[('class','byline')])
- if bylinetext.isspace() or (bylinetext == ''):
- h3byline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
- else:
- h3byline.insert(0,NavigableString(bylinetext+u'\u2014'+date_items[0]+','+date_items[1]))
- bylinetag.replaceWith(h3byline)
- else:
- headlinetag = soup.find('div',attrs={'class' : re.compile("^articleHeadlineBox")})
- if headlinetag:
- dateline = Tag(soup,'h3', [('class','byline')])
- dateline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
- headlinetag.insert(len(headlinetag),dateline)
- else: # if no date tag, don't process this page--it's not a news item
- return None
- # This gets rid of the annoying superfluous bullet symbol preceding columnist bylines
- ultag = soup.find('ul',attrs={'class' : 'cMetadata metadataType-articleCredits'})
- if ultag:
- a = ultag.h3
- if a:
- ultag.replaceWith(a)
- return soup
-
- def parse_index(self):
-
- articles = {}
- key = None
- ans = []
-
- def parse_index_page(page_name,page_title):
-
- def article_title(tag):
- atag = tag.find('h2') # title is usually in an h2 tag
- if not atag: # if not, get text from the a tag
- atag = tag.find('a',href=True)
- if not atag:
- return ''
- t = self.tag_to_string(atag,False)
- if t == '':
- # sometimes the title is in the second a tag
- atag.extract()
- atag = tag.find('a',href=True)
- if not atag:
- return ''
- return self.tag_to_string(atag,False)
- return t
- return self.tag_to_string(atag,False)
-
- def article_author(tag):
- atag = tag.find('strong') # author is usually in a strong tag
- if not atag:
- atag = tag.find('h4') # if not, look for an h4 tag
- if not atag:
- return ''
- return self.tag_to_string(atag,False)
-
- def article_summary(tag):
- atag = tag.find('p')
- if not atag:
- return ''
- subtag = atag.strong
- if subtag:
- subtag.extract()
- return self.tag_to_string(atag,False)
-
- def article_url(tag):
- atag = tag.find('a',href=True)
- if not atag:
- return ''
- url = re.sub(r'\?.*', '', atag['href'])
- return url
-
- def handle_section_name(tag):
- # turns a tag into a section name with special processing
- # for Wat's News, U.S., World & U.S. and World
- s = self.tag_to_string(tag,False)
- if ("What" in s) and ("News" in s):
- s = "What's News"
- elif (s == "U.S.") or (s == "World & U.S.") or (s == "World"):
- s = s + " News"
- return s
-
-
-
- mainurl = 'http://online.wsj.com'
- pageurl = mainurl+page_name
- #self.log("Page url %s" % pageurl)
- soup = self.index_to_soup(pageurl)
- # Find each instance of div with class including "headlineSummary"
- for divtag in soup.findAll('div',attrs={'class' : re.compile("^headlineSummary")}):
- # divtag contains all article data as ul's and li's
- # first, check if there is an h3 tag which provides a section name
- stag = divtag.find('h3')
- if stag:
- if stag.parent.get('class', '') == 'dynamic':
- # a carousel of articles is too complex to extract a section name
- # for each article, so we'll just call the section "Carousel"
- section_name = 'Carousel'
- else:
- section_name = handle_section_name(stag)
- else:
- section_name = "What's News"
- #self.log("div Section %s" % section_name)
- # find each top-level ul in the div
- # we don't restrict to class = newsItem because the section_name
- # sometimes changes via a ul tag inside the div
- for ultag in divtag.findAll('ul',recursive=False):
- stag = ultag.find('h3')
- if stag:
- if stag.parent.name == 'ul':
- # section name has changed
- section_name = handle_section_name(stag)
- #self.log("ul Section %s" % section_name)
- # delete the h3 tag so it doesn't get in the way
- stag.extract()
- # find each top level li in the ul
- for litag in ultag.findAll('li',recursive=False):
- stag = litag.find('h3')
- if stag:
- # section name has changed
- section_name = handle_section_name(stag)
- #self.log("li Section %s" % section_name)
- # delete the h3 tag so it doesn't get in the way
- stag.extract()
- # if there is a ul tag inside the li it is superfluous;
- # it is probably a list of related articles
- utag = litag.find('ul')
- if utag:
- utag.extract()
- # now skip paid subscriber articles if desired
- subscriber_tag = litag.find(text="Subscriber Content")
- if subscriber_tag:
- if self.omit_paid_content:
- continue
- # delete the tip div so it doesn't get in the way
- tiptag = litag.find("div", { "class" : "tipTargetBox" })
- if tiptag:
- tiptag.extract()
- h1tag = litag.h1
- # if there's an h1 tag, it's parent is a div which should replace
- # the li tag for the analysis
- if h1tag:
- litag = h1tag.parent
- h5tag = litag.h5
- if h5tag:
- # section mame has changed
- section_name = self.tag_to_string(h5tag,False)
- #self.log("h5 Section %s" % section_name)
- # delete the h5 tag so it doesn't get in the way
- h5tag.extract()
- url = article_url(litag)
- if url == '':
- continue
- if url.startswith("/article"):
- url = mainurl+url
- if not url.startswith("http://online.wsj.com"):
- continue
- if not url.endswith(".html"):
- continue
- if 'video' in url:
- continue
- title = article_title(litag)
- if title == '':
- continue
- #self.log("URL %s" % url)
- #self.log("Title %s" % title)
- pubdate = ''
- #self.log("Date %s" % pubdate)
- author = article_author(litag)
- if author == '':
- author = section_name
- elif author == section_name:
- author = ''
- else:
- author = section_name+': '+author
- #if not author == '':
- # self.log("Author %s" % author)
- description = article_summary(litag)
- #if not description == '':
- # self.log("Description %s" % description)
- if not articles.has_key(page_title):
- articles[page_title] = []
- articles[page_title].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
-
-
- for page_name,page_title in self.sectionlist:
- parse_index_page(page_name,page_title)
- ans.append(page_title)
-
- ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
- return ans
diff --git a/src/calibre/gui2/convert/single.ui b/src/calibre/gui2/convert/single.ui
index 290b31aaec..5b976a5cfb 100644
--- a/src/calibre/gui2/convert/single.ui
+++ b/src/calibre/gui2/convert/single.ui
@@ -31,7 +31,14 @@
-
-
+
+
+ QComboBox::AdjustToMinimumContentsLengthWithIcon
+
+
+ 5
+
+
-
@@ -64,7 +71,14 @@
-
-
+
+
+ QComboBox::AdjustToMinimumContentsLengthWithIcon
+
+
+ 5
+
+
@@ -115,8 +129,8 @@
0
0
- 810
- 489
+ 805
+ 484
diff --git a/src/calibre/libunrar.py b/src/calibre/libunrar.py
index bf38a47d64..a71fd8718a 100644
--- a/src/calibre/libunrar.py
+++ b/src/calibre/libunrar.py
@@ -177,7 +177,7 @@ def extract(path, dir):
try:
if open_archive_data.OpenResult != 0:
raise UnRARException(_interpret_open_error(open_archive_data.OpenResult, path))
- prints('Archive:', path)
+ #prints('Archive:', path)
#print get_archive_info(open_archive_data.Flags)
header_data = RARHeaderDataEx(CmtBuf=None)
#_libunrar.RARSetCallback(arc_data, callback_func, mode)