diff --git a/resources/images/dialog_question.svg b/resources/images/dialog_question.svg new file mode 100644 index 0000000000..be41385b7d --- /dev/null +++ b/resources/images/dialog_question.svg @@ -0,0 +1,269 @@ + + +image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/resources/images/edit_copy.svg b/resources/images/edit_copy.svg new file mode 100644 index 0000000000..86e1adbc3f --- /dev/null +++ b/resources/images/edit_copy.svg @@ -0,0 +1,4298 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/resources/images/edit_input.svg b/resources/images/edit_input.svg index 90d61917ee..54200503e2 100644 --- a/resources/images/edit_input.svg +++ b/resources/images/edit_input.svg @@ -13,23 +13,23 @@ id="Layer_1" x="0px" y="0px" - width="207.38489" - height="201.668" - viewBox="0 0 207.38488 201.668" - enable-background="new 0 0 595.28 841.89" + width="249.46899" + height="257.47101" + viewBox="0 0 249.469 257.471" + enable-background="new 0 0 595.279 841.89" xml:space="preserve" inkscape:version="0.47 r22583" sodipodi:docname="edit_input.svg">image/svg+xml - - - - + - + - - + + - \ No newline at end of file diff --git a/resources/images/news/evz.ro.png b/resources/images/news/evz.ro.png new file mode 100644 index 0000000000..c0549185bc Binary files /dev/null and b/resources/images/news/evz.ro.png differ diff --git a/resources/images/news/haaretz.png b/resources/images/news/haaretz.png new file mode 100644 index 0000000000..e6cb02c3b6 Binary files /dev/null and b/resources/images/news/haaretz.png differ diff --git a/resources/images/sync.svg b/resources/images/sync.svg index 09e59a1461..5055d530ea 100644 --- a/resources/images/sync.svg +++ b/resources/images/sync.svg @@ -13,10 +13,10 @@ id="Layer_1" x="0px" y="0px" - width="148.905" - height="201.16701" - viewBox="0 0 148.90499 201.167" - enable-background="new 0 0 595.28 841.89" + width="148.90399" + height="202.65399" + viewBox="0 0 148.90399 202.65399" + enable-background="new 0 0 595.279 841.89" xml:space="preserve" inkscape:version="0.47 r22583" sodipodi:docname="sync.svg"> + x1="297.64529" + y1="525.29828" + x2="297.64529" + y2="327.84811" + gradientTransform="matrix(1,0,0,-1,-223.1929,526.13229)"> \ No newline at end of file diff --git a/resources/recipes/anchorage_daily.recipe b/resources/recipes/anchorage_daily.recipe new file mode 100644 index 0000000000..4ce2f13a14 --- /dev/null +++ b/resources/recipes/anchorage_daily.recipe @@ -0,0 +1,40 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1278347258(BasicNewsRecipe): + title = u'Anchorage Daily News' + __author__ = 'rty' + oldest_article = 7 + max_articles_per_feed = 100 + + feeds = [(u'Alaska News', u'http://www.adn.com/news/alaska/index.xml'), + (u'Business', u'http://www.adn.com/money/index.xml'), + (u'Sports', u'http://www.adn.com/sports/index.xml'), + (u'Politics', u'http://www.adn.com/politics/index.xml'), + (u'Lifestyles', u'http://www.adn.com/life/index.xml'), + (u'Iditarod', u'http://www.adn.com/iditarod/index.xml') + ] + description = ''''Alaska's Newspaper''' + publisher = 'http://www.adn.com' + category = 'news, Alaska, Anchorage' + language = 'en' + extra_css = ''' + p{font-weight: normal;text-align: justify} + ''' + remove_javascript = True + use_embedded_content = False + no_stylesheets = True + language = 'en' + encoding = 'latin-1' + conversion_options = {'linearize_tables':True} + masthead_url = 'http://media.adn.com/includes/assets/images/adn_logo.2.gif' + + keep_only_tags = [ + dict(name='div', attrs={'class':'left_col story_mainbar'}), + ] + remove_tags = [ + dict(name='div', attrs={'class':'story_tools'}), + dict(name='p', attrs={'class':'ad_label'}), + ] + remove_tags_after = [ + dict(name='div', attrs={'class':'advertisement'}), + ] diff --git a/resources/recipes/evz.ro.recipe b/resources/recipes/evz.ro.recipe new file mode 100644 index 0000000000..bce151d1fc --- /dev/null +++ b/resources/recipes/evz.ro.recipe @@ -0,0 +1,52 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +evz.ro +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class EVZ_Ro(BasicNewsRecipe): + title = 'evz.ro' + __author__ = 'Darko Miletic' + description = 'News from Romania' + publisher = 'evz.ro' + category = 'news, politics, Romania' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'ro' + masthead_url = 'http://www.evz.ro/fileadmin/images/logo.gif' + extra_css = ' body{font-family: Georgia,Arial,Helvetica,sans-serif } .firstP{font-size: 1.125em} .author,.articleInfo{font-size: small} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + preprocess_regexps = [ + (re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '<head><title>') + ,(re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '') + ] + + remove_tags = [ + dict(name=['form','embed','iframe','object','base','link','script','noscript']) + ,dict(attrs={'class':['section','statsInfo','email il']}) + ,dict(attrs={'id' :'gallery'}) + ] + + remove_tags_after = dict(attrs={'class':'section'}) + keep_only_tags = [dict(attrs={'class':'single'})] + remove_attributes = ['height','width'] + + feeds = [(u'Articles', u'http://www.evz.ro/rss.xml')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/resources/recipes/haaretz_en.recipe b/resources/recipes/haaretz_en.recipe index 4df6b45a3e..4404624aff 100644 --- a/resources/recipes/haaretz_en.recipe +++ b/resources/recipes/haaretz_en.recipe @@ -1,56 +1,95 @@ __license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' -haaretz.com +www.haaretz.com ''' +import re +from calibre import strftime +from time import gmtime from calibre.web.feeds.news import BasicNewsRecipe -class Haaretz_en(BasicNewsRecipe): - title = 'Haaretz in English' +class HaaretzPrint_en(BasicNewsRecipe): + title = 'Haaretz - print edition' __author__ = 'Darko Miletic' - description = 'Haaretz.com, the online edition of Haaretz Newspaper in Israel, and analysis from Israel and the Middle East. Haaretz.com provides extensive and in-depth coverage of Israel, the Jewish World and the Middle East, including defense, diplomacy, the Arab-Israeli conflict, the peace process, Israeli politics, Jerusalem affairs, international relations, Iran, Iraq, Syria, Lebanon, the Palestinian Authority, the West Bank and the Gaza Strip, the Israeli business world and Jewish life in Israel and the Diaspora. ' - publisher = 'haaretz.com' - category = 'news, politics, Israel' + description = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East." + publisher = 'Haaretz' + category = "news, Haaretz, Israel news, Israel newspapers, Israel business news, Israel financial news, Israeli news,Israeli newspaper, Israeli newspapers, news from Israel, news in Israel, news Israel, news on Israel, newspaper Israel, Israel sports news, Israel diplomacy news" oldest_article = 2 max_articles_per_feed = 200 no_stylesheets = True - encoding = 'cp1252' + encoding = 'utf8' use_embedded_content = False language = 'en_IL' publication_type = 'newspaper' - remove_empty_feeds = True - masthead_url = 'http://www.haaretz.com/images/logos/logoGrey.gif' + PREFIX = 'http://www.haaretz.com' + masthead_url = PREFIX + '/images/logos/logoGrey.gif' extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } ' + preprocess_regexps = [(re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '')] + conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : language + 'comment' : description + , 'tags' : category + , 'publisher': publisher + , 'language' : language } - remove_tags = [dict(name='div', attrs={'class':['rightcol']}),dict(name='table')] - remove_tags_before = dict(name='h1') - remove_tags_after = dict(attrs={'id':'innerArticle'}) - keep_only_tags = [dict(attrs={'id':'content'})] + keep_only_tags = [dict(attrs={'id':'threecolumns'})] + remove_attributes = ['width','height'] + remove_tags = [ + dict(name=['iframe','link','object','embed']) + ,dict(name='div',attrs={'class':'rightcol'}) + ] feeds = [ - (u'Opinion' , u'http://www.haaretz.com/cmlink/opinion-rss-1.209234?localLinksEnabled=false' ) - ,(u'Defense and diplomacy' , u'http://www.haaretz.com/cmlink/defense-and-diplomacy-rss-1.208894?localLinksEnabled=false') - ,(u'National' , u'http://www.haaretz.com/cmlink/national-rss-1.208896?localLinksEnabled=false' ) - ,(u'International' , u'http://www.haaretz.com/cmlink/international-rss-1.208898?localLinksEnabled=false' ) - ,(u'Jewish World' , u'http://www.haaretz.com/cmlink/jewish-world-rss-1.209085?localLinksEnabled=false' ) - ,(u'Business' , u'http://www.haaretz.com/cmlink/business-print-rss-1.264904?localLinksEnabled=false' ) - ,(u'Real Estate' , u'http://www.haaretz.com/cmlink/real-estate-print-rss-1.264977?localLinksEnabled=false' ) - ,(u'Features' , u'http://www.haaretz.com/cmlink/features-print-rss-1.264912?localLinksEnabled=false' ) - ,(u'Arts and leisure' , u'http://www.haaretz.com/cmlink/arts-and-leisure-rss-1.286090?localLinksEnabled=false' ) - ,(u'Books' , u'http://www.haaretz.com/cmlink/books-rss-1.264947?localLinksEnabled=false' ) - ,(u'Food and Wine' , u'http://www.haaretz.com/cmlink/food-and-wine-print-rss-1.265034?localLinksEnabled=false' ) - ,(u'Sports' , u'http://www.haaretz.com/cmlink/sports-rss-1.286092?localLinksEnabled=false' ) + (u'News' , PREFIX + u'/print-edition/news' ) + ,(u'Opinion' , PREFIX + u'/print-edition/opinion' ) + ,(u'Business' , PREFIX + u'/print-edition/business' ) + ,(u'Real estate' , PREFIX + u'/print-edition/real-estate' ) + ,(u'Sports' , PREFIX + u'/print-edition/sports' ) + ,(u'Travel' , PREFIX + u'/print-edition/travel' ) + ,(u'Books' , PREFIX + u'/print-edition/books' ) + ,(u'Food & Wine' , PREFIX + u'/print-edition/food-wine' ) + ,(u'Arts & Leisure', PREFIX + u'/print-edition/arts-leisure' ) + ,(u'Features' , PREFIX + u'/print-edition/features' ) ] + + def print_version(self, url): + article = url.rpartition('/')[2] + return 'http://www.haaretz.com/misc/article-print-page/' + article + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + for item in soup.findAll(attrs={'class':'text'}): + sp = item.find('span',attrs={'class':'h3 font-weight-normal'}) + desc = item.find('p') + description = '' + if sp: + if desc: + description = self.tag_to_string(desc) + link = sp.a + url = self.PREFIX + link['href'] + title = self.tag_to_string(link) + times = strftime('%a, %d %b %Y %H:%M:%S +0000',gmtime()) + articles.append({ + 'title' :title + ,'date' :times + ,'url' :url + ,'description':description + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds + + def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] diff --git a/resources/recipes/nytimes_sub.recipe b/resources/recipes/nytimes_sub.recipe index f4101ca299..1814132667 100644 --- a/resources/recipes/nytimes_sub.recipe +++ b/resources/recipes/nytimes_sub.recipe @@ -336,7 +336,7 @@ class NYTimes(BasicNewsRecipe): self.log(">>> No class:'columnGroup first' found <<<") # Change class="kicker" to

kicker = soup.find(True, {'class':'kicker'}) - if kicker and kicker.contents[0]: + if kicker and kicker.contents and kicker.contents[0]: h3Tag = Tag(soup, "h3") h3Tag.insert(0, self.fixChars(self.tag_to_string(kicker, use_alt=False))) @@ -460,8 +460,10 @@ class NYTimes(BasicNewsRecipe): return self.massageNCXText(self.tag_to_string(p,use_alt=False)) return None - article.author = extract_author(soup) - article.summary = article.text_summary = extract_description(soup) + if not article.author: + article.author = extract_author(soup) + if not article.summary: + article.summary = article.text_summary = extract_description(soup) def strip_anchors(self,soup): paras = soup.findAll(True) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 9face1d9e9..d922af0914 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -226,10 +226,11 @@ def error_dialog(parent, title, msg, det_msg='', show=False, return d.exec_() return d -def question_dialog(parent, title, msg, det_msg='', show_copy_button=True): - d = MessageBox(QMessageBox.Question, title, msg, QMessageBox.Yes|QMessageBox.No, +def question_dialog(parent, title, msg, det_msg='', show_copy_button=True, + buttons=QMessageBox.Yes|QMessageBox.No): + d = MessageBox(QMessageBox.Question, title, msg, buttons, parent, det_msg) - d.setIconPixmap(QPixmap(I('dialog_information.svg'))) + d.setIconPixmap(QPixmap(I('dialog_question.svg'))) d.setEscapeButton(QMessageBox.No) if not show_copy_button: d.cb.setVisible(False) @@ -592,8 +593,11 @@ def open_url(qurl): def open_local_file(path): - url = QUrl.fromLocalFile(path) - open_url(url) + if iswindows: + os.startfile(os.path.normpath(path)) + else: + url = QUrl.fromLocalFile(path) + open_url(url) def is_ok_to_use_qt(): global gui_thread, _store_app diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 5eee4a3478..91afac8aa2 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -10,7 +10,7 @@ from functools import partial from binascii import unhexlify from PyQt4.Qt import QMenu, QAction, QActionGroup, QIcon, SIGNAL, QPixmap, \ - Qt, pyqtSignal, QColor, QPainter, QDialog + Qt, pyqtSignal, QColor, QPainter, QDialog, QMessageBox from PyQt4.QtSvg import QSvgRenderer from calibre.customize.ui import available_input_formats, available_output_formats, \ @@ -953,7 +953,8 @@ class DeviceMixin(object): # {{{ autos = '\n'.join('%s'%i for i in autos) if question_dialog(self, _('No suitable formats'), _('Auto convert the following books before sending via ' - 'email?'), det_msg=autos): + 'email?'), det_msg=autos, + buttons=QMessageBox.Yes|QMessageBox.Cancel): self.auto_convert_mail(to, fmts, delete_from_library, auto, format) if bad: @@ -1052,7 +1053,8 @@ class DeviceMixin(object): # {{{ autos = '\n'.join('%s'%i for i in autos) if question_dialog(self, _('No suitable formats'), _('Auto convert the following books before uploading to ' - 'the device?'), det_msg=autos): + 'the device?'), det_msg=autos, + buttons=QMessageBox.Yes|QMessageBox.Cancel): self.auto_convert_catalogs(auto, format) files = [f for f in files if f is not None] if not files: @@ -1113,7 +1115,8 @@ class DeviceMixin(object): # {{{ autos = '\n'.join('%s'%i for i in autos) if question_dialog(self, _('No suitable formats'), _('Auto convert the following books before uploading to ' - 'the device?'), det_msg=autos): + 'the device?'), det_msg=autos, + buttons=QMessageBox.Yes|QMessageBox.Cancel): self.auto_convert_news(auto, format) files = [f for f in files if f is not None] for f in files: @@ -1231,7 +1234,8 @@ class DeviceMixin(object): # {{{ autos = '\n'.join('%s'%i for i in autos) if question_dialog(self, _('No suitable formats'), _('Auto convert the following books before uploading to ' - 'the device?'), det_msg=autos): + 'the device?'), det_msg=autos, + buttons=QMessageBox.Yes|QMessageBox.Cancel): self.auto_convert(auto, on_card, format) if bad: diff --git a/src/calibre/gui2/ui.py b/src/calibre/gui2/ui.py index 709f91da25..80a7e6e004 100644 --- a/src/calibre/gui2/ui.py +++ b/src/calibre/gui2/ui.py @@ -546,7 +546,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceMixin, ToolbarMixin, # {{{ ''' MSG = _('is the result of the efforts of many volunteers from all ' 'over the world. If you find it useful, please consider ' - 'donating to support its development.') + 'donating to support its development. Your donation helps ' + 'keep calibre development going.') HTML = u''' diff --git a/src/calibre/gui2/viewer/main.ui b/src/calibre/gui2/viewer/main.ui index 9177f2713f..78d0d3b2da 100644 --- a/src/calibre/gui2/viewer/main.ui +++ b/src/calibre/gui2/viewer/main.ui @@ -243,7 +243,7 @@ - :/images/convert.svg:/images/convert.svg + :/images/edit_copy.svg:/images/edit_copy.svg Copy to clipboard diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index cd8abd0493..7df11575de 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -504,7 +504,7 @@ Meaning, it is very difficult to determine where one paragraph ends and another paragraphs using a configurable, :guilabel:`Line Un-Wrapping Factor`. This is a scale used to determine the length at which a line should be unwrapped. Valid values are a decimal between 0 and 1. The default is 0.5, this is the median line length. Lower this value to include more -text in the unwrapping. Increase to include less. +text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under PDF Input. Also, they often have headers and footers as part of the document that will become included with the text. Use the options to remove headers and footers to mitigate this issue. If the headers and footers are not diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index b1af210011..a5478f96a9 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -683,13 +683,15 @@ class BasicNewsRecipe(Recipe): base.extract() ans = self.postprocess_html(soup, first_fetch) - try: - article = self.feed_objects[f].articles[a] - except: - self.log.exception('Failed to get article object for postprocessing') - pass - else: - self.populate_article_metadata(article, ans, first_fetch) + if job_info: + url, f, a, feed_len = job_info + try: + article = self.feed_objects[f].articles[a] + except: + self.log.exception('Failed to get article object for postprocessing') + pass + else: + self.populate_article_metadata(article, ans, first_fetch) return ans diff --git a/src/calibre/web/feeds/recipes/collection.py b/src/calibre/web/feeds/recipes/collection.py index 9baebf9900..1dd19dc524 100644 --- a/src/calibre/web/feeds/recipes/collection.py +++ b/src/calibre/web/feeds/recipes/collection.py @@ -22,7 +22,7 @@ E = ElementMaker(namespace=NS, nsmap={None:NS}) def iterate_over_builtin_recipe_files(): exclude = ['craigslist', 'iht', 'outlook_india', 'toronto_sun', - 'indian_express', 'india_today'] + 'indian_express', 'india_today', 'livemint'] d = os.path.dirname base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'resources', 'recipes') for x in os.walk(base):