diff --git a/recipes/arizona_republic.recipe b/recipes/arizona_republic.recipe new file mode 100644 index 0000000000..5bc2140946 --- /dev/null +++ b/recipes/arizona_republic.recipe @@ -0,0 +1,68 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, jolo' +''' +azrepublic.com +''' +from calibre.web.feeds.recipes import BasicNewsRecipe + +class AdvancedUserRecipe1307301031(BasicNewsRecipe): + title = u'AZRepublic' + __author__ = 'Jim Olo' + language = 'en' + description = "The Arizona Republic is Arizona's leading provider of news and information, and has published a daily newspaper in Phoenix for more than 110 years" + publisher = 'AZRepublic/AZCentral' + masthead_url = 'http://freedom2t.com/wp-content/uploads/press_az_republic_v2.gif' + cover_url = 'http://www.valleyleadership.org/Common/Img/2line4c_AZRepublic%20with%20azcentral%20logo.jpg' + category = 'news, politics, USA, AZ, Arizona' + + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + remove_javascript = True +# extra_css = '.headline {font-size: medium;} \n .fact { padding-top: 10pt }' + extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .headline {font-size: medium} .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' + + remove_attributes = ['width','height','h2','subHeadline','style'] + remove_tags = [ + dict(name='div', attrs={'id':['slidingBillboard', 'top728x90', 'subindex-header', 'topSearch']}), + dict(name='div', attrs={'id':['simplesearch', 'azcLoginBox', 'azcLoginBoxInner', 'topNav']}), + dict(name='div', attrs={'id':['carsDrop', 'homesDrop', 'rentalsDrop', 'classifiedDrop']}), + dict(name='div', attrs={'id':['nav', 'mp', 'subnav', 'jobsDrop']}), + dict(name='h6', attrs={'class':['section-header']}), + dict(name='a', attrs={'href':['#comments']}), + dict(name='div', attrs={'class':['articletools clearfix', 'floatRight']}), + dict(name='div', attrs={'id':['fbFrame', 'ob', 'storyComments', 'storyGoogleAdBox']}), + dict(name='div', attrs={'id':['storyTopHomes', 'openRight', 'footerwrap', 'copyright']}), + dict(name='div', attrs={'id':['blogsHed', 'blog_comments', 'blogByline','blogTopics']}), + dict(name='div', attrs={'id':['membersRightMain', 'dealsfooter', 'azrTopHed', 'azrRightCol']}), + dict(name='div', attrs={'id':['ttdHeader', 'ttdTimeWeather']}), + dict(name='div', attrs={'id':['membersRightMain', 'deals-header-wrap']}), + dict(name='div', attrs={'id':['todoTopSearchBar', 'byline clearfix', 'subdex-topnav']}), + dict(name='h1', attrs={'id':['SEOtext']}), + dict(name='table', attrs={'class':['ap-mediabox-table']}), + dict(name='p', attrs={'class':['ap_para']}), + dict(name='span', attrs={'class':['source-org vcard', 'org fn']}), + dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/privacy']}), + dict(name='a', attrs={'href':['http://hosted2.ap.org/APDEFAULT/terms']}), + dict(name='div', attrs={'id':['onespot_nextclick']}), + ] + + feeds = [ + (u'FrontPage', u'http://www.azcentral.com/rss/feeds/republicfront.xml'), + (u'TopUS-News', u'http://hosted.ap.org/lineups/USHEADS.rss?SITE=AZPHG&SECTION=HOME'), + (u'WorldNews', u'http://hosted.ap.org/lineups/WORLDHEADS.rss?SITE=AZPHG&SECTION=HOME'), + (u'TopBusiness', u'http://hosted.ap.org/lineups/BUSINESSHEADS.rss?SITE=AZPHG&SECTION=HOME'), + (u'Entertainment', u'http://hosted.ap.org/lineups/ENTERTAINMENT.rss?SITE=AZPHG&SECTION=HOME'), + (u'ArizonaNews', u'http://www.azcentral.com/rss/feeds/news.xml'), + (u'Gilbert', u'http://www.azcentral.com/rss/feeds/gilbert.xml'), + (u'Chandler', u'http://www.azcentral.com/rss/feeds/chandler.xml'), + (u'DiningReviews', u'http://www.azcentral.com/rss/feeds/diningreviews.xml'), + (u'AZBusiness', u'http://www.azcentral.com/rss/feeds/business.xml'), + (u'ArizonaDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog'), + (u'GroceryDeals', u'http://www.azcentral.com/members/Blog%7E/RealDealsblog/tag/2646') + ] + + + + diff --git a/recipes/athens_news.recipe b/recipes/athens_news.recipe new file mode 100644 index 0000000000..6667faaf0c --- /dev/null +++ b/recipes/athens_news.recipe @@ -0,0 +1,70 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Miletic ' +''' +www.athensnews.gr +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class AthensNews(BasicNewsRecipe): + title = 'Athens News' + __author__ = 'Darko Miletic' + description = 'Greece in English since 1952' + publisher = 'NEP Publishing Company SA' + category = 'news, politics, Greece, Athens' + oldest_article = 1 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'en_GR' + remove_empty_feeds = True + publication_type = 'newspaper' + masthead_url = 'http://www.athensnews.gr/sites/athensnews/themes/athensnewsv3/images/logo.jpg' + extra_css = """ + body{font-family: Arial,Helvetica,sans-serif } + img{margin-bottom: 0.4em; display:block} + .big{font-size: xx-large; font-family: Georgia,serif} + .articlepubdate{font-size: small; color: gray; font-family: Georgia,serif} + .lezanta{font-size: x-small; font-weight: bold; text-align: left; margin-bottom: 1em; display: block} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + , 'linearize_tables' : True + } + + remove_tags = [ + dict(name=['meta','link']) + ] + keep_only_tags=[ + dict(name='span',attrs={'class':'big'}) + ,dict(name='td', attrs={'class':['articlepubdate','text']}) + ] + remove_attributes=['lang'] + + + feeds = [ + (u'News' , u'http://www.athensnews.gr/category/1/feed' ) + ,(u'Politics' , u'http://www.athensnews.gr/category/8/feed' ) + ,(u'Business' , u'http://www.athensnews.gr/category/2/feed' ) + ,(u'Economy' , u'http://www.athensnews.gr/category/11/feed') + ,(u'Community' , u'http://www.athensnews.gr/category/5/feed' ) + ,(u'Arts' , u'http://www.athensnews.gr/category/3/feed' ) + ,(u'Living in Athens', u'http://www.athensnews.gr/category/7/feed' ) + ,(u'Sports' , u'http://www.athensnews.gr/category/4/feed' ) + ,(u'Travel' , u'http://www.athensnews.gr/category/6/feed' ) + ,(u'Letters' , u'http://www.athensnews.gr/category/44/feed') + ,(u'Media' , u'http://www.athensnews.gr/multimedia/feed' ) + ] + + def print_version(self, url): + return url + '?action=print' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/buenosaireseconomico.recipe b/recipes/buenosaireseconomico.recipe index 782358e6d3..2de02c7c10 100644 --- a/recipes/buenosaireseconomico.recipe +++ b/recipes/buenosaireseconomico.recipe @@ -1,72 +1,60 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2011, Darko Miletic ' ''' -elargentino.com +www.diariobae.com ''' - +from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag class BsAsEconomico(BasicNewsRecipe): title = 'Buenos Aires Economico' __author__ = 'Darko Miletic' - description = 'Revista Argentina' - publisher = 'ElArgentino.com' + description = 'Diario BAE es el diario economico-politico con mas influencia en la Argentina. Fuente de empresarios y politicos del pais y el exterior. El pozo estaria aportando en periodos breves un volumen equivalente a 800m3 diarios. Pero todavia deben efectuarse otras perforaciones adicionales.' + publisher = 'Diario BAE' category = 'news, politics, economy, Argentina' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf-8' - language = 'es_AR' + language = 'es_AR' + cover_url = strftime('http://www.diariobae.com/imgs_portadas/%Y%m%d_portadasBAE.jpg') + masthead_url = 'http://www.diariobae.com/img/logo_bae.png' + remove_empty_feeds = True + publication_type = 'newspaper' + extra_css = """ + body{font-family: Georgia,"Times New Roman",Times,serif} + #titulo{font-size: x-large} + #epi{font-size: small; font-style: italic; font-weight: bold} + img{display: block; margin-top: 1em} + """ + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } - lang = 'es-AR' - direction = 'ltr' - INDEX = 'http://www.elargentino.com/medios/121/Buenos-Aires-Economico.html' - extra_css = ' .titulo{font-size: x-large; font-weight: bold} .volantaImp{font-size: small; font-weight: bold} ' - - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher + remove_tags_before= dict(attrs={'id':'titulo'}) + remove_tags_after = dict(attrs={'id':'autor' }) + remove_tags = [ + dict(name=['meta','base','iframe','link','lang']) + ,dict(attrs={'id':'barra_tw'}) ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\noverride_css=" p {text-indent: 0cm; margin-top: 0em; margin-bottom: 0.5em} "' - - keep_only_tags = [dict(name='div', attrs={'class':'ContainerPop'})] - - remove_tags = [dict(name='link')] - - feeds = [(u'Articulos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=121&Content-Type=text/xml&ChannelDesc=Buenos%20Aires%20Econ%C3%B3mico')] - - def print_version(self, url): - main, sep, article_part = url.partition('/nota-') - article_id, rsep, rrest = article_part.partition('-') - return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id + remove_attributes = ['data-count','data-via'] + + feeds = [ + (u'Argentina' , u'http://www.diariobae.com/rss/argentina.xml' ) + ,(u'Valores' , u'http://www.diariobae.com/rss/valores.xml' ) + ,(u'Finanzas' , u'http://www.diariobae.com/rss/finanzas.xml' ) + ,(u'Negocios' , u'http://www.diariobae.com/rss/negocios.xml' ) + ,(u'Mundo' , u'http://www.diariobae.com/rss/mundo.xml' ) + ,(u'5 dias' , u'http://www.diariobae.com/rss/5dias.xml' ) + ,(u'Espectaculos', u'http://www.diariobae.com/rss/espectaculos.xml') + ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] - soup.html['lang'] = self.lang - soup.html['dir' ] = self.direction - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) return soup - - def get_cover_url(self): - cover_url = None - soup = self.index_to_soup(self.INDEX) - cover_item = soup.find('div',attrs={'class':'colder'}) - if cover_item: - clean_url = self.image_url_processor(None,cover_item.div.img['src']) - cover_url = 'http://www.elargentino.com' + clean_url + '&height=600' - return cover_url - - def image_url_processor(self, baseurl, url): - base, sep, rest = url.rpartition('?Id=') - img, sep2, rrest = rest.partition('&') - return base + sep + img diff --git a/recipes/catholic_news_agency.recipe b/recipes/catholic_news_agency.recipe new file mode 100644 index 0000000000..43b7755f07 --- /dev/null +++ b/recipes/catholic_news_agency.recipe @@ -0,0 +1,13 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1301972345(BasicNewsRecipe): + title = u'Catholic News Agency' + language = 'en' + __author__ = 'Jetkey' + oldest_article = 5 + max_articles_per_feed = 20 + + feeds = [(u'U.S. News', u'http://feeds.feedburner.com/catholicnewsagency/dailynews-us'), + (u'Vatican', u'http://feeds.feedburner.com/catholicnewsagency/dailynews-vatican'), + (u'Bishops Corner', u'http://feeds.feedburner.com/catholicnewsagency/columns/bishopscorner'), + (u'Saint of the Day', u'http://feeds.feedburner.com/catholicnewsagency/saintoftheday')] diff --git a/recipes/criticadigital.recipe b/recipes/criticadigital.recipe deleted file mode 100644 index 3cb72e6be4..0000000000 --- a/recipes/criticadigital.recipe +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' -''' -criticadigital.com -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class CriticaDigital(BasicNewsRecipe): - title = 'Critica de la Argentina' - __author__ = 'Darko Miletic and Sujata Raman' - description = 'Noticias de Argentina' - oldest_article = 2 - max_articles_per_feed = 100 - language = 'es_AR' - - no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - - extra_css = ''' - h1{font-family:"Trebuchet MS";} - h3{color:#9A0000; font-family:Tahoma; font-size:x-small;} - h2{color:#504E53; font-family:Arial,Helvetica,sans-serif ;font-size:small;} - #epigrafe{font-family:Arial,Helvetica,sans-serif ;color:#666666 ; font-size:x-small;} - p {font-family:Arial,Helvetica,sans-serif;} - #fecha{color:#858585; font-family:Tahoma; font-size:x-small;} - #autor{color:#858585; font-family:Tahoma; font-size:x-small;} - #hora{color:#F00000;font-family:Tahoma; font-size:x-small;} - ''' - keep_only_tags = [ - dict(name='div', attrs={'class':['bloqueTitulosNoticia','cfotonota']}) - ,dict(name='div', attrs={'id':'boxautor'}) - ,dict(name='p', attrs={'id':'textoNota'}) - ] - - remove_tags = [ - dict(name='div', attrs={'class':'box300' }) - ,dict(name='div', style=True ) - ,dict(name='div', attrs={'class':'titcomentario'}) - ,dict(name='div', attrs={'class':'comentario' }) - ,dict(name='div', attrs={'class':'paginador' }) - ] - - feeds = [ - (u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica' ) - ,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia' ) - ,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes' ) - ,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos') - ,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo' ) - ,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales' ) - ,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad' ) - ,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud' ) - ,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia' ) - ,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe' ) - ] - - def get_cover_url(self): - cover_url = None - index = 'http://www.criticadigital.com/impresa/' - soup = self.index_to_soup(index) - link_item = soup.find('div',attrs={'class':'tapa'}) - if link_item: - cover_url = index + link_item.img['src'] - return cover_url - - diff --git a/recipes/elcronista.recipe b/recipes/elcronista.recipe index 93615f8f42..f8da81c4bb 100644 --- a/recipes/elcronista.recipe +++ b/recipes/elcronista.recipe @@ -1,72 +1,59 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' +__copyright__ = '2008-2011, Darko Miletic ' ''' -cronista.com +www.cronista.com ''' from calibre.web.feeds.news import BasicNewsRecipe -class ElCronista(BasicNewsRecipe): - title = 'El Cronista' +class Pagina12(BasicNewsRecipe): + title = 'El Cronista Comercial' __author__ = 'Darko Miletic' - description = 'Noticias de Argentina' + description = 'El Cronista Comercial es el Diario economico-politico mas valorado. Es la fuente mas confiable de informacion en temas de economia, finanzas y negocios enmarcados politicamente.' + publisher = 'Cronista.com' + category = 'news, politics, economy, finances, Argentina' oldest_article = 2 - language = 'es_AR' - - max_articles_per_feed = 100 + max_articles_per_feed = 200 no_stylesheets = True + encoding = 'utf8' use_embedded_content = False - encoding = 'cp1252' + language = 'es_AR' + remove_empty_feeds = True + publication_type = 'newspaper' + masthead_url = 'http://www.cronista.com/export/sites/diarioelcronista/arte/header-logo.gif' + extra_css = """ + body{font-family: Arial,Helvetica,sans-serif } + h2{font-family: Georgia,"Times New Roman",Times,serif } + img{margin-bottom: 0.4em; display:block} + .nom{font-weight: bold; vertical-align: baseline} + .autor-cfoto{border-bottom: 1px solid #D2D2D2; + border-top: 1px solid #D2D2D2; + display: inline-block; + margin: 0 10px 10px 0; + padding: 10px; + width: 210px} + .under{font-weight: bold} + .time{font-size: small} + """ - html2lrf_options = [ - '--comment' , description - , '--category' , 'news, Argentina' - , '--publisher' , title - ] + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } - keep_only_tags = [ - dict(name='table', attrs={'width':'100%' }) - ,dict(name='h1' , attrs={'class':'Arialgris16normal'}) - ] + remove_tags = [ + dict(name=['meta','link','base','iframe','object','embed']) + ,dict(attrs={'class':['user-tools','tabsmedia']}) + ] + remove_attributes = ['lang'] + remove_tags_before = dict(attrs={'class':'top'}) + remove_tags_after = dict(attrs={'class':'content-nota'}) + feeds = [(u'Ultimas noticias', u'http://www.cronista.com/rss.html')] - remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})] - - feeds = [ - (u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' ) - ,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' ) - ,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' ) - ,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' ) - ,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' ) - ,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' ) - ,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml') - ,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' ) - ,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' ) - ,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' ) - ,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' ) - ] - - def print_version(self, url): - main, sep, rest = url.partition('.com/notas/') - article_id, lsep, rrest = rest.partition('-') - return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id def preprocess_html(self, soup): - mtag = '' - soup.head.insert(0,mtag) - soup.head.base.extract() - htext = soup.find('h1',attrs={'class':'Arialgris16normal'}) - htext.name = 'p' - soup.prettify() + for item in soup.findAll(style=True): + del item['style'] return soup - - def get_cover_url(self): - cover_url = None - index = 'http://www.cronista.com/contenidos/' - soup = self.index_to_soup(index + 'ee.html') - link_item = soup.find('a',attrs={'href':"javascript:Close()"}) - if link_item: - cover_url = index + link_item.img['src'] - return cover_url - diff --git a/recipes/icons/athens_news.png b/recipes/icons/athens_news.png new file mode 100644 index 0000000000..499a11dbe2 Binary files /dev/null and b/recipes/icons/athens_news.png differ diff --git a/recipes/icons/buenosaireseconomico.png b/recipes/icons/buenosaireseconomico.png new file mode 100644 index 0000000000..d84f7483ae Binary files /dev/null and b/recipes/icons/buenosaireseconomico.png differ diff --git a/recipes/icons/elcronista.png b/recipes/icons/elcronista.png index 0be856345e..ca64756de1 100644 Binary files a/recipes/icons/elcronista.png and b/recipes/icons/elcronista.png differ diff --git a/recipes/infobae.recipe b/recipes/infobae.recipe index 9553746449..b577988347 100644 --- a/recipes/infobae.recipe +++ b/recipes/infobae.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2008-2010, Darko Miletic ' +__copyright__ = '2008-2011, Darko Miletic ' ''' infobae.com ''' @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Infobae(BasicNewsRecipe): title = 'Infobae.com' __author__ = 'Darko Miletic and Sujata Raman' - description = 'Informacion Libre las 24 horas' + description = 'Infobae.com es el sitio de noticias con mayor actualizacion de Latinoamérica. Noticias actualizadas las 24 horas, los 365 días del año.' publisher = 'Infobae.com' category = 'news, politics, Argentina' oldest_article = 1 @@ -17,13 +17,13 @@ class Infobae(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False language = 'es_AR' - encoding = 'cp1252' - masthead_url = 'http://www.infobae.com/imgs/header/header.gif' - remove_javascript = True + encoding = 'utf8' + masthead_url = 'http://www.infobae.com/media/img/static/logo-infobae.gif' remove_empty_feeds = True extra_css = ''' - body{font-family:Arial,Helvetica,sans-serif;} - .popUpTitulo{color:#0D4261; font-size: xx-large} + body{font-family: Arial,Helvetica,sans-serif} + img{display: block} + .categoria{font-size: small; text-transform: uppercase} ''' conversion_options = { @@ -31,26 +31,44 @@ class Infobae(BasicNewsRecipe): , 'tags' : category , 'publisher' : publisher , 'language' : language - , 'linearize_tables' : True } - - + + keep_only_tags = [dict(attrs={'class':['titularnota','nota','post-title','post-entry','entry-title','entry-info','entry-content']})] + remove_tags_after = dict(attrs={'class':['interior-noticia','nota-desc','tags']}) + remove_tags = [ + dict(name=['base','meta','link','iframe','object','embed','ins']) + ,dict(attrs={'class':['barranota','tags']}) + ] + feeds = [ - (u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) - ,(u'Salud' , u'http://www.infobae.com/adjuntos/html/RSS/salud.xml' ) - ,(u'Tecnologia', u'http://www.infobae.com/adjuntos/html/RSS/tecnologia.xml') - ,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' ) + (u'Saludable' , u'http://www.infobae.com/rss/saludable.xml') + ,(u'Economia' , u'http://www.infobae.com/rss/economia.xml' ) + ,(u'En Numeros', u'http://www.infobae.com/rss/rating.xml' ) + ,(u'Finanzas' , u'http://www.infobae.com/rss/finanzas.xml' ) + ,(u'Mundo' , u'http://www.infobae.com/rss/mundo.xml' ) + ,(u'Sociedad' , u'http://www.infobae.com/rss/sociedad.xml' ) + ,(u'Politica' , u'http://www.infobae.com/rss/politica.xml' ) + ,(u'Deportes' , u'http://www.infobae.com/rss/deportes.xml' ) ] - def print_version(self, url): - article_part = url.rpartition('/')[2] - article_id= article_part.partition('-')[0] - return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id - - def postprocess_html(self, soup, first): - for tag in soup.findAll(name='strong'): - tag.name = 'b' + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('a'): + limg = item.find('img') + if item.string is not None: + str = item.string + item.replaceWith(str) + else: + if limg: + item.name = 'div' + item.attrs = [] + else: + str = self.tag_to_string(item) + item.replaceWith(str) + for item in soup.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' return soup - diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 333a5baaa4..4858b585ae 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -611,7 +611,7 @@ from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS, from calibre.devices.sne.driver import SNE from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL, GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR, - TREKSTOR, EEEREADER, NEXTBOOK, ADAM) + TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK) from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG from calibre.devices.kobo.driver import KOBO from calibre.devices.bambook.driver import BAMBOOK @@ -746,6 +746,7 @@ plugins += [ EEEREADER, NEXTBOOK, ADAM, + MOOVYBOOK, ITUNES, BOEYE_BEX, BOEYE_BDX, @@ -1382,7 +1383,7 @@ class StoreOpenBooksStore(StoreBase): name = 'Open Books' description = u'Comprehensive listing of DRM free ebooks from a variety of sources provided by users of calibre.' actual_plugin = 'calibre.gui2.store.stores.open_books_plugin:OpenBooksStore' - + drm_free_only = True headquarters = 'US' diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index 735d2f69a0..edca43528a 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -48,6 +48,12 @@ class Table(object): class OneToOneTable(Table): + ''' + Represents data that is unique per book (it may not actually be unique) but + each item is assigned to a book in a one-to-one mapping. For example: uuid, + timestamp, size, etc. + ''' + def read(self, db): self.book_col_map = {} idcol = 'id' if self.metadata['table'] == 'books' else 'book' @@ -66,6 +72,13 @@ class SizeTable(OneToOneTable): class ManyToOneTable(Table): + ''' + Represents data where one data item can map to many books, for example: + series or publisher. + + Each book however has only one value for data of this type. + ''' + def read(self, db): self.id_map = {} self.extra_map = {} @@ -91,6 +104,12 @@ class ManyToOneTable(Table): class ManyToManyTable(ManyToOneTable): + ''' + Represents data that has a many-to-many mapping with books. i.e. each book + can have more than one value and each value can be mapped to more than one + book. For example: tags or authors. + ''' + def read_maps(self, db): for row in db.conn.execute( 'SELECT book, {0} FROM books_{1}_link'.format( diff --git a/src/calibre/devices/misc.py b/src/calibre/devices/misc.py index 2a6a76719d..6c5706f039 100644 --- a/src/calibre/devices/misc.py +++ b/src/calibre/devices/misc.py @@ -329,3 +329,25 @@ class NEXTBOOK(USBMS): f.write(metadata.thumbnail[-1]) ''' +class MOOVYBOOK(USBMS): + + name = 'Moovybook device interface' + gui_name = 'Moovybook' + description = _('Communicate with the Moovybook Reader') + author = 'Kovid Goyal' + supported_platforms = ['windows', 'osx', 'linux'] + + # Ordered list of supported formats + FORMATS = ['epub', 'txt', 'pdf'] + + VENDOR_ID = [0x1cae] + PRODUCT_ID = [0x9b08] + BCD = [0x02] + + EBOOK_DIR_MAIN = '' + + SUPPORTS_SUB_DIRS = True + + def get_main_ebook_dir(self, for_upload=False): + return 'Books' if for_upload else self.EBOOK_DIR_MAIN + diff --git a/src/calibre/manual/develop.rst b/src/calibre/manual/develop.rst index fecdf28a47..506615914c 100644 --- a/src/calibre/manual/develop.rst +++ b/src/calibre/manual/develop.rst @@ -187,6 +187,26 @@ in your favorite editor and add the line:: near the top of the file. Now run the command :command:`calibredb`. The very first line of output should be ``Hello, world!``. +Having separate "normal" and "development" |app| installs on the same computer +------------------------------------------------------------------------------- + +The calibre source tree is very stable, it rarely breaks, but if you feel the need to run from source on a separate +test library and run the released calibre version with your everyday library, you can achieve this easily using +.bat files or shell scripts to launch |app|. The example below shows how to do this on windows using .bat files (the +instructions for other platforms are the same, just use a BASh script instead of a .bat file) + +To launch the relase version of |app| with your everyday library: + +calibre-normal.bat:: + + calibre.exe "--with-library=C:\path\to\everyday\library folder" + +calibre-dev.bat:: + + set CALIBRE_DEVELOP_FROM=C:\path\to\calibre\checkout\src + calibre.exe "--with-library=C:\path\to\test\library folder" + + Debugging tips ---------------- diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst index f4b04f6e9d..e5e789d9dd 100644 --- a/src/calibre/manual/gui.rst +++ b/src/calibre/manual/gui.rst @@ -164,13 +164,16 @@ Library .. |lii| image:: images/library.png :class: float-right-img -|lii| The :guilabel: `Library` action allows you to create, switch between, rename or delete a Library. |app| allows you to create as many libraries as you wish. You could for instance create a fiction library, a non fiction library, a foreign language library a project library, basically any structure that suits your needs. Libraries are the highest organizational structure within |app|, each library has its own set of books, tags, categories and base storage location. +|lii| The :guilabel:`Library` action allows you to create, switch between, rename or delete a Library. |app| allows you to create as many libraries as you wish. You could for instance create a fiction library, a non fiction library, a foreign language library, a project library, basically any structure that suits your needs. Libraries are the highest organizational structure within |app|, each library has its own set of books, tags, categories and base storage location. - 1. **Switch\Create library..**: This action allows you to; a) connect to a pre-existing |app| library at another location from your currently open library, b) Create and empty library at a nw location or, c) Move the current Library to a newly specified location. - 2. **Quick Switch>**: This action allows you to switch between libraries that have been registered or created within |app|. - 3. **Rename Library>**: This action allows you to rename a Library. - 4. **Delete Library>**: This action allows you to **permanenetly delete** a Library. - 5. ****: Actions 5, 6 etc .. give you immediate switch access between multiple Libraries that you have created or attached to. + 1. **Switch/Create library**: This action allows you to; a) connect to a pre-existing |app| library at another location from your currently open library, b) Create and empty library at a new location or, c) Move the current Library to a newly specified location. + 2. **Quick Switch**: This action allows you to switch between libraries that have been registered or created within |app|. + 3. **Rename Library**: This action allows you to rename a Library. + 4. **Remove Library**: This action allows you to unregister a library from |app|. + 5. ****: Actions 5, 6 etc .. give you immediate switch access between multiple Libraries that you have created or attached to. This list contains only the 5 most frequently used libraries. For the complete list, use the Quick Switch menu. + 6. **Library Maintenance**: This action allows you to check the current library for data consistency issues and restore the current libraries' database from backups. + +.. note:: Metadata about your ebooks like title/author/tags/etc. is stored in a single file in your |app| library folder called metadata.db. If this file gets corrupted (a very rare event), you can lose the metadata. Fortunately, |app| automatically backs up the metadata for every individual book in the book's folder as an .opf file. By using the Restore Library action under Library Maintenance described above, you can have |app| rebuild the metadata.db file from the individual .opf files for you. .. _device: diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py index f14858c3b6..cdb20b4d6e 100644 --- a/src/calibre/utils/localization.py +++ b/src/calibre/utils/localization.py @@ -109,6 +109,7 @@ _extra_lang_codes = { 'en_AU' : _('English (Australia)'), 'en_NZ' : _('English (New Zealand)'), 'en_CA' : _('English (Canada)'), + 'en_GR' : _('English (Greece)'), 'en_IN' : _('English (India)'), 'en_TH' : _('English (Thailand)'), 'en_CY' : _('English (Cyprus)'),