From db5f63bcac8a0112022e4bafb54939804b1e1f1e Mon Sep 17 00:00:00 2001 From: bulislaw Date: Sat, 22 Jan 2011 17:44:09 +0000 Subject: [PATCH 01/52] Persistent Search & Replace query UI proposal --- src/calibre/gui2/dialogs/metadata_bulk.ui | 135 ++++++++++++++++++---- 1 file changed, 113 insertions(+), 22 deletions(-) diff --git a/src/calibre/gui2/dialogs/metadata_bulk.ui b/src/calibre/gui2/dialogs/metadata_bulk.ui index f8ae926be6..a1e1d8c550 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.ui +++ b/src/calibre/gui2/dialogs/metadata_bulk.ui @@ -44,8 +44,8 @@ 0 0 - 842 - 589 + 832 + 574 @@ -55,7 +55,7 @@ - 0 + 2 @@ -574,7 +574,7 @@ Future conversion of these books will use the default settings. QLayout::SetMinimumSize - + true @@ -591,7 +591,7 @@ Future conversion of these books will use the default settings. - + Search &field: @@ -601,14 +601,14 @@ Future conversion of these books will use the default settings. - + The name of the field that you want to search - + @@ -642,7 +642,7 @@ Future conversion of these books will use the default settings. - + Te&mplate: @@ -652,7 +652,7 @@ Future conversion of these books will use the default settings. - + @@ -665,7 +665,7 @@ Future conversion of these books will use the default settings. - + &Search for: @@ -675,7 +675,7 @@ Future conversion of these books will use the default settings. - + @@ -688,7 +688,7 @@ Future conversion of these books will use the default settings. - + Check this box if the search string must match exactly upper and lower case. Uncheck it if case is to be ignored @@ -701,7 +701,7 @@ Future conversion of these books will use the default settings. - + &Replace with: @@ -711,14 +711,14 @@ Future conversion of these books will use the default settings. - + The replacement text. The matched search text will be replaced with this string - + @@ -753,7 +753,7 @@ field is processed. In regular expression mode, only the matched text is process - + &Destination field: @@ -763,7 +763,7 @@ field is processed. In regular expression mode, only the matched text is process - + The field that the text will be put into after all replacements. @@ -771,7 +771,7 @@ If blank, the source field is used if the field is modifiable - + @@ -820,7 +820,7 @@ not multiple and the destination field is multiple - + @@ -906,7 +906,7 @@ not multiple and the destination field is multiple - + QFrame::NoFrame @@ -919,8 +919,8 @@ not multiple and the destination field is multiple 0 0 - 197 - 60 + 810 + 264 @@ -968,6 +968,77 @@ not multiple and the destination field is multiple + + + + Load query: + + + search_field + + + + + + + The name of the field that you want to search + + + + + + + Qt::Horizontal + + + + + + + + + Qt::Horizontal + + + QSizePolicy::Fixed + + + + 20 + 20 + + + + + + + + Save + + + + + + + Remove + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + @@ -1030,6 +1101,9 @@ not multiple and the destination field is multiple series_numbering_restarts series_start_number button_box + query_field + save_button + remove_button search_field search_mode s_r_template @@ -1045,6 +1119,23 @@ not multiple and the destination field is multiple multiple_separator test_text test_result + scrollArea + central_widget + swap_title_and_author + clear_series + adddate + clear_adddate_button + apply_adddate + pubdate + clear_pubdate_button + apply_pubdate + remove_format + change_title_to_title_case + remove_conversion_settings + cover_generate + cover_remove + cover_from_fmt + scrollArea11 From e42664da72abf03af7c86d6b8ded585389e61b65 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 22 Jan 2011 11:08:13 -0700 Subject: [PATCH 02/52] Roger Ebert by Shane Erstad --- resources/recipes/roger_ebert.recipe | 120 +++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 resources/recipes/roger_ebert.recipe diff --git a/resources/recipes/roger_ebert.recipe b/resources/recipes/roger_ebert.recipe new file mode 100644 index 0000000000..2ea5b52a45 --- /dev/null +++ b/resources/recipes/roger_ebert.recipe @@ -0,0 +1,120 @@ +import re +import urllib2 +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer + +class Ebert(BasicNewsRecipe): + title = 'Roger Ebert' + __author__ = 'Shane Erstad' + description = 'Roger Ebert Movie Reviews' + publisher = 'Chicago Sun Times' + category = 'movies' + oldest_article = 8 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + masthead_url = 'http://rogerebert.suntimes.com/graphics/global/roger.jpg' + language = 'en' + remove_empty_feeds = False + PREFIX = 'http://rogerebert.suntimes.com' + patternReviews = r'(.*?).*?
(.*?)
(.*?)' + patternCommentary = r'
.*?(.*?).*?
(.*?)
' + patternPeople = r'
.*?(.*?).*?
(.*?)
' + patternGlossary = r'
.*?(.*?).*?
(.*?)
' + + + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + , 'linearize_tables' : True + } + + + feeds = [ + (u'Reviews' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=reviews' ) + ,(u'Commentary' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=COMMENTARY') + ,(u'Great Movies' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=REVIEWS08') + ,(u'People' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=PEOPLE') + ,(u'Glossary' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=GLOSSARY') + + ] + + preprocess_regexps = [ + (re.compile(r'.*?This is a printer friendly.*?.*?
', re.DOTALL|re.IGNORECASE), + lambda m: '') + ] + + + + def print_version(self, url): + return url + '&template=printart' + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.log('\tFeedurl: ', feedurl) + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + page = urllib2.urlopen(feedurl).read() + + if feedtitle == 'Reviews' or feedtitle == 'Great Movies': + pattern = self.patternReviews + elif feedtitle == 'Commentary': + pattern = self.patternCommentary + elif feedtitle == 'People': + pattern = self.patternPeople + elif feedtitle == 'Glossary': + pattern = self.patternGlossary + + + regex = re.compile(pattern, re.IGNORECASE|re.DOTALL) + + for match in regex.finditer(page): + if feedtitle == 'Reviews' or feedtitle == 'Great Movies': + movietitle = match.group(1) + thislink = match.group(2) + description = match.group(3) + elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary': + thislink = match.group(1) + description = match.group(2) + + self.log(thislink) + + for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')): + thisurl = self.PREFIX + link['href'] + thislinktext = self.tag_to_string(link) + + if feedtitle == 'Reviews' or feedtitle == 'Great Movies': + thistitle = movietitle + elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary': + thistitle = thislinktext + + if thistitle == '': + thistitle = 'Ebert Journal Post' + + """ + pattern2 = r'AID=\/(.*?)\/' + reg2 = re.compile(pattern2, re.IGNORECASE|re.DOTALL) + match2 = reg2.search(thisurl) + date = match2.group(1) + c = time.strptime(match2.group(1),"%Y%m%d") + date=time.strftime("%a, %b %d, %Y", c) + self.log(date) + """ + + articles.append({ + 'title' :thistitle + ,'date' :'' + ,'url' :thisurl + ,'description':description + }) + totalfeeds.append((feedtitle, articles)) + + return totalfeeds + From 57883c120e95c72c7b47e82524ec1d468cdff76b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 22 Jan 2011 11:28:47 -0700 Subject: [PATCH 03/52] Fix regression in converting HTML files that have non ASCII characters inside their