diff --git a/src/calibre/trac/plugins/download.py b/src/calibre/trac/plugins/download.py index 03a6676e7b..dd25279071 100644 --- a/src/calibre/trac/plugins/download.py +++ b/src/calibre/trac/plugins/download.py @@ -20,7 +20,7 @@ DEPENDENCIES = [ ('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'), ('dnspython', '1.6.0', 'dnspython', 'dnspython', 'dnspython', 'dnspython'), ('poppler', '0.10.5', 'poppler', 'poppler', 'poppler', 'poppler'), - ('pdftk', '1.12', 'pdftk', 'pdftk', 'pdftk', 'pdftk'), + ('podofo', '0.7', 'podofo', 'podofo', 'podofo', 'podofo'), ] diff --git a/src/calibre/trac/plugins/templates/linux.html b/src/calibre/trac/plugins/templates/linux.html index 96881aa108..ffaa1d8394 100644 --- a/src/calibre/trac/plugins/templates/linux.html +++ b/src/calibre/trac/plugins/templates/linux.html @@ -49,7 +49,7 @@

${app} is available in the software repositories of the following - linux distributions: + supported linux distributions: diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 48e5d9e720..4d2adfb1c0 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -42,7 +42,7 @@ recipe_modules = ['recipe_' + r for r in ( 'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna', 'seattle_times', 'scott_hanselman', 'coding_horror', 'twitchfilms', 'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews', - 'straitstimes', + 'straitstimes', 'index_hu', 'pcworld_hu', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_blic.py b/src/calibre/web/feeds/recipes/recipe_blic.py index e4e4987dec..e212e73218 100644 --- a/src/calibre/web/feeds/recipes/recipe_blic.py +++ b/src/calibre/web/feeds/recipes/recipe_blic.py @@ -16,12 +16,14 @@ class Blic(BasicNewsRecipe): description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja' publisher = 'RINGIER d.o.o.' category = 'news, politics, Serbia' + delay = 1 oldest_article = 2 max_articles_per_feed = 100 remove_javascript = True no_stylesheets = True use_embedded_content = False language = _('Serbian') + lang = 'sr-Latn-RS' extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} ' html2lrf_options = [ @@ -45,26 +47,14 @@ class Blic(BasicNewsRecipe): start_url, question, rest_url = url.partition('?') return u'http://www.blic.rs/_print.php?' + rest_url - def cleanup_image_tags(self,soup): - for item in soup.findAll('img'): - for attrib in ['height','width','border','align']: - if item.has_key(attrib): - del item[attrib] - oldParent = item.parent - myIndex = oldParent.contents.index(item) - item.extract() - divtag = Tag(soup,'div') - brtag = Tag(soup,'br') - oldParent.insert(myIndex,divtag) - divtag.append(item) - divtag.append(brtag) - return soup - - def preprocess_html(self, soup): - mtag = '' - soup.head.insert(0,mtag) + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + soup.head.insert(0,mlang) for item in soup.findAll(style=True): del item['style'] - return self.cleanup_image_tags(soup) + return self.adeify_images(soup) + + def get_article_url(self, article): + raw = article.get('link', None) + return raw.replace('.co.yu','.rs') \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_index_hu.py b/src/calibre/web/feeds/recipes/recipe_index_hu.py new file mode 100644 index 0000000000..8b36500e5c --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_index_hu.py @@ -0,0 +1,20 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Index(BasicNewsRecipe): + + title = u'INDEX.HU' + oldest_article = 3 + max_articles_per_feed = 50 + language = _('Hungarian') + __author__ = 'Ezmegaz' + + feeds = [(u'ALL', u'http://index.hu/24ora/rss/'), + (u'BELF\xd6LD', u'http://index.hu/belfold/rss/default/'), + (u'K\xdcLF\xd6LD', u'http://index.hu/kulfold/rss/default/'), + (u'BULV\xc1R', u'http://index.hu/bulvar/rss/default/'), + (u'GAZDAS\xc1G', u'http://index.hu/gazdasag/rss/default/'), + (u'TECH', u'http://index.hu/tech/rss/main/'), + (u'KULT\xdaRA', u'http://index.hu/kultur/rss/main/'), + (u'TUDOM\xc1NY', u'http://index.hu/tudomany/rss/main/'), + (u'V\xc9LEM\xc9NY', u'http://index.hu/velemeny/rss/default/')] + diff --git a/src/calibre/web/feeds/recipes/recipe_nin.py b/src/calibre/web/feeds/recipes/recipe_nin.py index fe1e97e8b8..4de53a1049 100644 --- a/src/calibre/web/feeds/recipes/recipe_nin.py +++ b/src/calibre/web/feeds/recipes/recipe_nin.py @@ -8,12 +8,13 @@ nin.co.rs import re, urllib from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Nin(BasicNewsRecipe): title = 'NIN online' __author__ = 'Darko Miletic' description = 'Nedeljne informativne novine' - publisher = 'NIN' + publisher = 'NIN D.O.O.' category = 'news, politics, Serbia' no_stylesheets = True oldest_article = 15 @@ -28,9 +29,9 @@ class Nin(BasicNewsRecipe): remove_javascript = True use_embedded_content = False language = _('Serbian') - lang = 'sr-RS' + lang = 'sr-Latn-RS' direction = 'ltr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}' html2lrf_options = [ '--comment' , description @@ -70,9 +71,10 @@ class Nin(BasicNewsRecipe): def preprocess_html(self, soup): soup.html['lang'] = self.lang soup.html['dir' ] = self.direction - mtag = '' - mtag += '\n' - soup.head.insert(0,mtag) + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) for item in soup.findAll(style=True): del item['style'] return soup diff --git a/src/calibre/web/feeds/recipes/recipe_pcworld_hu.py b/src/calibre/web/feeds/recipes/recipe_pcworld_hu.py new file mode 100644 index 0000000000..ad1f1df72a --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_pcworld_hu.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Index(BasicNewsRecipe): + + + title = u'PCWORLD.HU' + oldest_article = 3 + max_articles_per_feed = 50 + language = _('Hungarian') + __author__ = 'Ezmegaz' + + + feeds = [(u'H\xedrek', u'http://pcworld.hu/rss/rss.xml'), (u'Hardver h\xedrek', u'http://www.pcworld.hu/rss/rss_hardverhirek.xml'), (u'Szoftver h\xedrek', u'http://www.pcworld.hu/rss/rss_szoftverhirek.xml'), (u'Hardver cikkek', u'http://www.pcworld.hu/rss/rss_hardvercikkek.xml'), (u'Szoftver cikkek', u'http://www.pcworld.hu/rss/rss_szoftvercikkek.xml'), (u'Mobil h\xedrek', u'http://www.pcworld.hu/rss/rss_mobil.xml'), (u'\xdczleti h\xedrek', u'http://www.pcworld.hu/rss/rss_uzlet.xml'), (u'Let\xf6lt\xe9sek', u'http://www.pcworld.hu/rss/rss_letoltes.xml'), (u'PC World TV', u'http://tv.pcworld.hu/rss/rss_hun_pcw.xml'), (u'Tudta-e...?', u'http://pcworld.hu/rss/rss_tudtae.xml')] + diff --git a/src/calibre/web/feeds/recipes/recipe_pobjeda.py b/src/calibre/web/feeds/recipes/recipe_pobjeda.py index 5afb2b3f6a..6078e6ba0a 100644 --- a/src/calibre/web/feeds/recipes/recipe_pobjeda.py +++ b/src/calibre/web/feeds/recipes/recipe_pobjeda.py @@ -10,6 +10,7 @@ pobjeda.co.me import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Pobjeda(BasicNewsRecipe): title = 'Pobjeda Online' @@ -22,12 +23,13 @@ class Pobjeda(BasicNewsRecipe): encoding = 'utf8' remove_javascript = True use_embedded_content = False + language = _('Serbian') + lang = 'sr-Latn-Me' INDEX = u'http://www.pobjeda.co.me' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}' html2lrf_options = [ '--comment', description - , '--base-font-size', '10' , '--category', category , '--publisher', publisher ] @@ -59,11 +61,13 @@ class Pobjeda(BasicNewsRecipe): ] def preprocess_html(self, soup): - soup.html['xml:lang'] = 'sr-Latn-ME' - soup.html['lang'] = 'sr-Latn-ME' - mtag = '' - soup.head.insert(0,mtag) - return soup + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return self.adeify_images(soup) def get_cover_url(self): cover_url = None diff --git a/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py b/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py index 8c22262904..cc023448c7 100644 --- a/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py +++ b/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py @@ -1,39 +1,48 @@ -#!/usr/bin/env python - -__license__ = 'GPL v3' -__copyright__ = '2008, Darko Miletic ' -''' -sptimes.ru -''' - -from calibre import strftime -from calibre.web.feeds.news import BasicNewsRecipe - -class PetersburgTimes(BasicNewsRecipe): - title = u'The St. Petersburg Times' - __author__ = 'Darko Miletic' - description = 'News from Russia' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - language = _('English') - INDEX = 'http://www.sptimes.ru' - - def parse_index(self): - articles = [] - soup = self.index_to_soup(self.INDEX) - - for item in soup.findAll('a', attrs={'class':'story_link_o'}): - if item.has_key('href'): - url = self.INDEX + item['href'].replace('action_id=2','action_id=100') - title = self.tag_to_string(item) - c_date = strftime('%A, %d %B, %Y') - description = '' - articles.append({ - 'title':title, - 'date':c_date, - 'url':url, - 'description':description - }) - return [(soup.head.title.string, articles)] +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = '2009, Darko Miletic ' + +''' +sptimes.ru +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class PetersburgTimes(BasicNewsRecipe): + title = 'The St. Petersburg Times' + __author__ = 'Darko Miletic' + description = 'News from Russia' + publisher = 'sptimes.ru' + category = 'news, politics, Russia' + max_articles_per_feed = 100 + no_stylesheets = True + remove_javascript = True + encoding = 'cp1251' + use_embedded_content = False + language = _('English') + + html2lrf_options = [ + '--comment', description + , '--category', category + , '--publisher', publisher + , '--ignore-tables' + ] + + html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' + + remove_tags = [dict(name=['object','link','embed'])] + + feeds = [(u'Headlines', u'http://sptimes.ru/headlines.php' )] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + + def get_article_url(self, article): + raw = article.get('guid', None) + return raw + + def print_version(self, url): + start_url, question, article_id = url.rpartition('/') + return u'http://www.sptimes.ru/index.php?action_id=100&story_id=' + article_id + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_vijesti.py b/src/calibre/web/feeds/recipes/recipe_vijesti.py index 9923193d7b..9ef32e636c 100644 --- a/src/calibre/web/feeds/recipes/recipe_vijesti.py +++ b/src/calibre/web/feeds/recipes/recipe_vijesti.py @@ -9,6 +9,7 @@ vijesti.me import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Vijesti(BasicNewsRecipe): title = 'Vijesti' @@ -16,8 +17,8 @@ class Vijesti(BasicNewsRecipe): description = 'News from Montenegro' publisher = 'Daily Press Vijesti' category = 'news, politics, Montenegro' - oldest_article = 1 - max_articles_per_feed = 100 + oldest_article = 2 + max_articles_per_feed = 150 no_stylesheets = True remove_javascript = True encoding = 'cp1250' @@ -25,7 +26,8 @@ class Vijesti(BasicNewsRecipe): remove_javascript = True use_embedded_content = False language = _('Serbian') - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' + lang ='sr-Latn-Me' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}' html2lrf_options = [ '--comment', description @@ -44,12 +46,15 @@ class Vijesti(BasicNewsRecipe): feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss.php' )] def preprocess_html(self, soup): - soup.html['xml:lang'] = 'sr-Latn-ME' - soup.html['lang'] = 'sr-Latn-ME' - mtag = '' - soup.head.insert(0,mtag) - for item in soup.findAll('img'): - if item.has_key('align'): - del item['align'] - item.insert(0,'

') - return soup + soup.html['xml:lang'] = self.lang + soup.html['lang'] = self.lang + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) + return self.adeify_images(soup) + + def get_article_url(self, article): + raw = article.get('link', None) + return raw.replace('.cg.yu','.me') + \ No newline at end of file diff --git a/src/calibre/web/feeds/recipes/recipe_vreme.py b/src/calibre/web/feeds/recipes/recipe_vreme.py index 1df953cae3..bcc7a14407 100644 --- a/src/calibre/web/feeds/recipes/recipe_vreme.py +++ b/src/calibre/web/feeds/recipes/recipe_vreme.py @@ -9,6 +9,7 @@ vreme.com import re from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag class Vreme(BasicNewsRecipe): title = 'Vreme' @@ -27,7 +28,7 @@ class Vreme(BasicNewsRecipe): language = _('Serbian') lang = 'sr-Latn-RS' direction = 'ltr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}' + extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .heading1{font-size: x-large; font-weight: bold} .heading2{font-size: large; font-weight: bold} .toc-heading{font-size: small}' html2lrf_options = [ '--comment' , description @@ -89,9 +90,10 @@ class Vreme(BasicNewsRecipe): del item['size'] soup.html['lang'] = self.lang soup.html['dir' ] = self.direction - mtag = '' - mtag += '\n' - soup.head.insert(0,mtag) + mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")]) + soup.head.insert(0,mlang) + soup.head.insert(1,mcharset) return soup def get_cover_url(self):