diff --git a/Changelog.yaml b/Changelog.yaml index d1119e1bbb..b0a9bd68d4 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -35,7 +35,7 @@ - title: "Add an option under Preferences->Look & Feel->Book Details to hide the cover in the book details panel" - - title: "The Calibre Companion Android app that allows wireless connection of Android device to calibre is out of beta. See https://play.google.com/stor/apps/details?id=com.multipie.calibreandroid" + - title: "The Calibre Companion Android app that allows wireless connection of Android device to calibre is out of beta. See https://play.google.com/store/apps/details?id=com.multipie.calibreandroid" bug fixes: - title: "Fix sorting by author not working in the device view in calibre when connected to iTunes" diff --git a/recipes/arcamax.recipe b/recipes/arcamax.recipe index 0f144466d7..924f5ad088 100644 --- a/recipes/arcamax.recipe +++ b/recipes/arcamax.recipe @@ -43,38 +43,38 @@ class Arcamax(BasicNewsRecipe): feeds = [] for title, url in [ ######## COMICS - GENERAL ######## - #(u"9 Chickweed Lane", u"http://www.arcamax.com/ninechickweedlane"), - #(u"Agnes", u"http://www.arcamax.com/agnes"), - #(u"Andy Capp", u"http://www.arcamax.com/andycapp"), + #(u"9 Chickweed Lane", #u"http://www.arcamax.com/thefunnies/ninechickweedlane"), + #(u"Agnes", u"http://www.arcamax.com/thefunnies/agnes"), + #(u"Andy Capp", #u"http://www.arcamax.com/thefunnies/andycapp"), (u"BC", u"http://www.arcamax.com/thefunnies/bc"), - #(u"Baby Blues", u"http://www.arcamax.com/babyblues"), - #(u"Beetle Bailey", u"http://www.arcamax.com/beetlebailey"), + #(u"Baby Blues", #u"http://www.arcamax.com/thefunnies/babyblues"), + #(u"Beetle Bailey", #u"http://www.arcamax.com/thefunnies/beetlebailey"), (u"Blondie", u"http://www.arcamax.com/thefunnies/blondie"), - #u"Boondocks", u"http://www.arcamax.com/boondocks"), - #(u"Cathy", u"http://www.arcamax.com/cathy"), - #(u"Daddys Home", u"http://www.arcamax.com/daddyshome"), + #u"Boondocks", u"http://www.arcamax.com/thefunnies/boondocks"), + #(u"Cathy", u"http://www.arcamax.com/thefunnies/cathy"), + #(u"Daddys Home", #u"http://www.arcamax.com/thefunnies/daddyshome"), (u"Dilbert", u"http://www.arcamax.com/thefunnies/dilbert"), - #(u"Dinette Set", u"http://www.arcamax.com/thedinetteset"), + #(u"Dinette Set", #u"http://www.arcamax.com/thefunnies/thedinetteset"), (u"Dog Eat Doug", u"http://www.arcamax.com/thefunnies/dogeatdoug"), (u"Doonesbury", u"http://www.arcamax.com/thefunnies/doonesbury"), - #(u"Dustin", u"http://www.arcamax.com/dustin"), + #(u"Dustin", u"http://www.arcamax.com/thefunnies/dustin"), (u"Family Circus", u"http://www.arcamax.com/thefunnies/familycircus"), (u"Garfield", u"http://www.arcamax.com/thefunnies/garfield"), - #(u"Get Fuzzy", u"http://www.arcamax.com/getfuzzy"), - #(u"Girls and Sports", u"http://www.arcamax.com/girlsandsports"), - #(u"Hagar the Horrible", u"http://www.arcamax.com/hagarthehorrible"), - #(u"Heathcliff", u"http://www.arcamax.com/heathcliff"), - #(u"Jerry King Cartoons", u"http://www.arcamax.com/humorcartoon"), - #(u"Luann", u"http://www.arcamax.com/luann"), - #(u"Momma", u"http://www.arcamax.com/momma"), - #(u"Mother Goose and Grimm", u"http://www.arcamax.com/mothergooseandgrimm"), + #(u"Get Fuzzy", #u"http://www.arcamax.com/thefunnies/getfuzzy"), + #(u"Girls and Sports", #u"http://www.arcamax.com/thefunnies/girlsandsports"), + #(u"Hagar the Horrible", #u"http://www.arcamax.com/thefunnies/hagarthehorrible"), + #(u"Heathcliff", #u"http://www.arcamax.com/thefunnies/heathcliff"), + #(u"Jerry King Cartoons", #u"http://www.arcamax.com/thefunnies/humorcartoon"), + #(u"Luann", u"http://www.arcamax.com/thefunnies/luann"), + #(u"Momma", u"http://www.arcamax.com/thefunnies/momma"), + #(u"Mother Goose and Grimm", #u"http://www.arcamax.com/thefunnies/mothergooseandgrimm"), (u"Mutts", u"http://www.arcamax.com/thefunnies/mutts"), - #(u"Non Sequitur", u"http://www.arcamax.com/nonsequitur"), - #(u"Pearls Before Swine", u"http://www.arcamax.com/pearlsbeforeswine"), - #(u"Pickles", u"http://www.arcamax.com/pickles"), - #(u"Red and Rover", u"http://www.arcamax.com/redandrover"), - #(u"Rubes", u"http://www.arcamax.com/rubes"), - #(u"Rugrats", u"http://www.arcamax.com/rugrats"), + #(u"Non Sequitur", #u"http://www.arcamax.com/thefunnies/nonsequitur"), + #(u"Pearls Before Swine", #u"http://www.arcamax.com/thefunnies/pearlsbeforeswine"), + #(u"Pickles", u"http://www.arcamax.com/thefunnies/pickles"), + #(u"Red and Rover", #u"http://www.arcamax.com/thefunnies/redandrover"), + #(u"Rubes", u"http://www.arcamax.com/thefunnies/rubes"), + #(u"Rugrats", u"http://www.arcamax.com/thefunnies/rugrats"), (u"Speed Bump", u"http://www.arcamax.com/thefunnies/speedbump"), (u"Wizard of Id", u"http://www.arcamax.com/thefunnies/wizardofid"), (u"Zits", u"http://www.arcamax.com/thefunnies/zits"), diff --git a/recipes/birmingham_post.recipe b/recipes/birmingham_post.recipe index ae5d2c9ce9..b9b3c3fc57 100644 --- a/recipes/birmingham_post.recipe +++ b/recipes/birmingham_post.recipe @@ -1,14 +1,17 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1306097511(BasicNewsRecipe): title = u'Birmingham post' - description = 'News for Birmingham UK' - timefmt = '' + description = 'Author D.Asbury. News for Birmingham UK' + #timefmt = '' + # last update 8/9/12 __author__ = 'Dave Asbury' - cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG' + cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg' oldest_article = 2 max_articles_per_feed = 12 + linearize_tables = True remove_empty_feeds = True remove_javascript = True + no_stylesheets = True #auto_cleanup = True language = 'en_GB' @@ -17,11 +20,12 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): keep_only_tags = [ - dict(name='h1',attrs={'id' : 'article-headline'}), + dict(attrs={'id' : 'article-header'}), + #dict(name='h1',attrs={'id' : 'article-header'}), dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}), - dict(name='div',attrs={'class' : 'article-image full'}), - dict(attrs={'clas' : 'art-o art-align-center otm-1 '}), - dict(name='div',attrs={'class' : 'article main'}), + dict(name='div',attrs={'class' : 'article-image full'}), + dict(attrs={'clas' : 'art-o art-align-center otm-1 '}), + dict(name='div',attrs={'class' : 'article main'}), #dict(name='p') #dict(attrs={'id' : 'three-col'}) ] @@ -37,11 +41,9 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): (u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml') ] - extra_css = ''' - body {font: sans-serif medium;}' - h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;} - h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; } - span{ font-size:9.5px; font-weight:bold;font-style:italic} - p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;} - - ''' + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;text-align:center;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' diff --git a/recipes/countryfile.recipe b/recipes/countryfile.recipe index 0502129791..71977048c7 100644 --- a/recipes/countryfile.recipe +++ b/recipes/countryfile.recipe @@ -1,12 +1,11 @@ from calibre import browser from calibre.web.feeds.news import BasicNewsRecipe - class AdvancedUserRecipe1325006965(BasicNewsRecipe): title = u'Countryfile.com' #cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg' __author__ = 'Dave Asbury' description = 'The official website of Countryfile Magazine' - # last updated 15/4/12 + # last updated 9/9//12 language = 'en_GB' oldest_article = 30 max_articles_per_feed = 25 @@ -17,13 +16,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe): def get_cover_url(self): soup = self.index_to_soup('http://www.countryfile.com/') cov = soup.find(attrs={'class' : 'imagecache imagecache-160px_wide imagecache-linked imagecache-160px_wide_linked'}) - #print '******** ',cov,' ***' + print '******** ',cov,' ***' cov2 = str(cov) - cov2=cov2[124:-90] - #print '******** ',cov2,' ***' - + cov2=cov2[140:223] + print '******** ',cov2,' ***' + #cov2='http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/1b_0.jpg' # try to get cover - if can't get known cover br = browser() + br.set_handle_redirect(False) try: br.open_novisit(cov2) diff --git a/recipes/history_today.recipe b/recipes/history_today.recipe new file mode 100644 index 0000000000..43adf7a358 --- /dev/null +++ b/recipes/history_today.recipe @@ -0,0 +1,87 @@ +import re +from calibre.web.feeds.recipes import BasicNewsRecipe +from collections import OrderedDict + +class HistoryToday(BasicNewsRecipe): + + title = 'History Today' + __author__ = 'Rick Shang' + + description = 'UK-based magazine, publishing articles and book reviews covering all types and periods of history.' + language = 'en' + category = 'news' + encoding = 'UTF-8' + + remove_tags = [dict(name='div',attrs={'class':['print-logo','print-site_name','print-breadcrumb']}), + dict(name='div', attrs={'id':['ht-tools','ht-tools2','ht-tags']})] + no_javascript = True + no_stylesheets = True + + + needs_subscription = True + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('http://www.historytoday.com/user/login') + br.select_form(nr=1) + br['name'] = self.username + br['pass'] = self.password + res = br.submit() + raw = res.read() + if 'Session limit exceeded' in raw: + br.select_form(nr=1) + control=br.find_control('sid').items[1] + sid = [] + br['sid']=sid.join(control) + br.submit() + return br + + def parse_index(self): + + #Find date + soup0 = self.index_to_soup('http://www.historytoday.com/') + dates = self.tag_to_string(soup0.find('div',attrs={'id':'block-block-226'}).span) + self.timefmt = u' [%s]'%dates + + #Go to issue + soup = self.index_to_soup('http://www.historytoday.com/contents') + cover = soup.find('div',attrs={'id':'content-area'}).find('img')['src'] + self.cover_url=cover + + #Go to the main body + + div = soup.find ('div', attrs={'class':'region region-content-bottom'}) + + feeds = OrderedDict() + section_title = '' + for section in div.findAll('div', attrs={'id':re.compile("block\-views\-contents.*")}): + section_title = self.tag_to_string(section.find('h2',attrs={'class':'title'})) + sectionbody=section.find('div', attrs={'class':'view-content'}) + for article in sectionbody.findAll('div',attrs={'class':re.compile("views\-row.*")}): + articles = [] + subarticle = [] + subarticle = article.findAll('div') + if len(subarticle) < 2: + continue + title=self.tag_to_string(subarticle[0]) + originalurl="http://www.historytoday.com" + subarticle[0].span.a['href'].strip() + originalpage=self.index_to_soup(originalurl) + printurl=originalpage.find('div',attrs = {'id':'ht-tools'}).a['href'].strip() + url="http://www.historytoday.com" + printurl + desc=self.tag_to_string(subarticle[1]) + articles.append({'title':title, 'url':url, 'description':desc, 'date':''}) + + if articles: + if section_title not in feeds: + feeds[section_title] = [] + feeds[section_title] += articles + + + ans = [(key, val) for key, val in feeds.iteritems()] + return ans + + + def cleanup(self): + self.browser.open('http://www.historytoday.com/logout') + diff --git a/recipes/metro_uk.recipe b/recipes/metro_uk.recipe index 5b7b3a64ed..fcceba4ce7 100644 --- a/recipes/metro_uk.recipe +++ b/recipes/metro_uk.recipe @@ -1,10 +1,10 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1306097511(BasicNewsRecipe): title = u'Metro UK' - description = 'Author Dave Asbury : News as provide by The Metro -UK' + description = 'Author Dave Asbury : News from The Metro - UK' #timefmt = '' __author__ = 'Dave Asbury' - #last update 4/8/12 + #last update 9/9/12 cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg' no_stylesheets = True oldest_article = 1 @@ -17,23 +17,24 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): language = 'en_GB' masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif' extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:1.6em;} + h1{font-family:Arial,Helvetica,sans-serif; font-weight:900;font-size:1.6em;} h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:1.2em;} p{font-family:Arial,Helvetica,sans-serif;font-size:1.0em;} body{font-family:Helvetica,Arial,sans-serif;font-size:1.0em;} - ''' + ''' keep_only_tags = [ - #dict(name='h1'), - #dict(name='h2'), - #dict(name='div', attrs={'class' : ['row','article','img-cnt figure','clrd']}) - #dict(name='h3'), - #dict(attrs={'class' : 'BText'}), - ] + #dict(name='h1'), + #dict(name='h2'), + #dict(name='div', attrs={'class' : ['row','article','img-cnt figure','clrd']}) + #dict(name='h3'), + #dict(attrs={'class' : 'BText'}), + ] remove_tags = [ + dict(name='div',attrs={'class' : 'art-fd fd-gr1-b clrd'}), dict(name='span',attrs={'class' : 'share'}), - dict(name='li'), - dict(attrs={'class' : ['twitter-share-button','header-forms','hdr-lnks','close','art-rgt','fd-gr1-b clrd google-article','news m12 clrd clr-b p5t shareBtm','item-ds csl-3-img news','c-1of3 c-last','c-1of1','pd','item-ds csl-3-img sport']}), - dict(attrs={'id' : ['','sky-left','sky-right','ftr-nav','and-ftr','notificationList','logo','miniLogo','comments-news','metro_extras']}) + dict(name='li'), + dict(attrs={'class' : ['twitter-share-button','header-forms','hdr-lnks','close','art-rgt','fd-gr1-b clrd google-article','news m12 clrd clr-b p5t shareBtm','item-ds csl-3-img news','c-1of3 c-last','c-1of1','pd','item-ds csl-3-img sport']}), + dict(attrs={'id' : ['','sky-left','sky-right','ftr-nav','and-ftr','notificationList','logo','miniLogo','comments-news','metro_extras']}) ] remove_tags_before = dict(name='h1') #remove_tags_after = dict(attrs={'id':['topic-buttons']}) diff --git a/recipes/volksrant.recipe b/recipes/volksrant.recipe index 386cb1e729..b3629ee4e0 100644 --- a/recipes/volksrant.recipe +++ b/recipes/volksrant.recipe @@ -73,14 +73,20 @@ class AdvancedUserRecipe1249039563(BasicNewsRecipe): Change Log: Date: 10/15/2010 Feeds updated by Martin Tarenskeen + Date: 09/09/2012 + Feeds updated by Eric Lammerts ''' feeds = [ - (u'Laatste Nieuws', u'http://www.volkskrant.nl/rss/laatstenieuws.rss'), - (u'Binnenland', u'http://www.volkskrant.nl/rss/nederland.rss'), - (u'Buitenland', u'http://www.volkskrant.nl/rss/internationaal.rss'), - (u'Economie', u'http://www.volkskrant.nl/rss/economie.rss'), - (u'Sport', u'http://www.volkskrant.nl/rss/sport.rss'), - (u'Cultuur', u'http://www.volkskrant.nl/rss/kunst.rss'), - (u'Gezondheid & Wetenschap', u'http://www.volkskrant.nl/rss/wetenschap.rss'), - (u'Internet & Media', u'http://www.volkskrant.nl/rss/media.rss') ] + (u'Nieuws', u'http://www.volkskrant.nl/nieuws/rss.xml'), + (u'Binnenland', u'http://www.volkskrant.nl/nieuws/binnenland/rss.xml'), + (u'Buitenland', u'http://www.volkskrant.nl/buitenland/rss.xml'), + (u'Economie', u'http://www.volkskrant.nl/nieuws/economie/rss.xml'), + (u'Politiek', u'http://www.volkskrant.nl/politiek/rss.xml'), + (u'Sport', u'http://www.volkskrant.nl/sport/rss.xml'), + (u'Cultuur', u'http://www.volkskrant.nl/nieuws/cultuur/rss.xml'), + (u'Gezondheid & wetenschap', u'http://www.volkskrant.nl/nieuws/gezondheid--wetenschap/rss.xml'), + (u'Tech & Media', u'http://www.volkskrant.nl/tech-media/rss.xml'), + (u'Reizen', u'http://www.volkskrant.nl/nieuws/reizen/rss.xml'), + (u'Opinie', u'http://www.volkskrant.nl/opinie/rss.xml'), + (u'Opmerkelijk', u'http://www.volkskrant.nl/nieuws/opmerkelijk/rss.xml') ] diff --git a/recipes/zeitde_sub.recipe b/recipes/zeitde_sub.recipe index dfa52e8504..b22e9793ed 100644 --- a/recipes/zeitde_sub.recipe +++ b/recipes/zeitde_sub.recipe @@ -118,13 +118,13 @@ class ZeitEPUBAbo(BasicNewsRecipe): def build_index(self): domain = "https://premium.zeit.de" - url = domain + "/abo/zeit_digital" + url = domain + "/abo/digitalpaket" browser = self.get_browser() # new login process response = browser.open(url) # Get rid of nested form - response.set_data(response.get_data().replace('