From 5ff51fc3f9c97ce1403d4cf51cebd308e81dc5b6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Tue, 12 May 2009 12:16:14 -0700
Subject: [PATCH] New recipes for index.hu and pcworld.hu by Ezmegaz
---
src/calibre/trac/plugins/download.py | 2 +-
src/calibre/trac/plugins/templates/linux.html | 2 +-
src/calibre/web/feeds/recipes/__init__.py | 2 +-
src/calibre/web/feeds/recipes/recipe_blic.py | 28 ++----
.../web/feeds/recipes/recipe_index_hu.py | 20 +++++
src/calibre/web/feeds/recipes/recipe_nin.py | 14 +--
.../web/feeds/recipes/recipe_pcworld_hu.py | 22 +++++
.../web/feeds/recipes/recipe_pobjeda.py | 18 ++--
.../recipes/recipe_st_petersburg_times.py | 87 ++++++++++---------
.../web/feeds/recipes/recipe_vijesti.py | 29 ++++---
src/calibre/web/feeds/recipes/recipe_vreme.py | 10 ++-
11 files changed, 144 insertions(+), 90 deletions(-)
create mode 100644 src/calibre/web/feeds/recipes/recipe_index_hu.py
create mode 100644 src/calibre/web/feeds/recipes/recipe_pcworld_hu.py
diff --git a/src/calibre/trac/plugins/download.py b/src/calibre/trac/plugins/download.py
index 03a6676e7b..dd25279071 100644
--- a/src/calibre/trac/plugins/download.py
+++ b/src/calibre/trac/plugins/download.py
@@ -20,7 +20,7 @@ DEPENDENCIES = [
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
('dnspython', '1.6.0', 'dnspython', 'dnspython', 'dnspython', 'dnspython'),
('poppler', '0.10.5', 'poppler', 'poppler', 'poppler', 'poppler'),
- ('pdftk', '1.12', 'pdftk', 'pdftk', 'pdftk', 'pdftk'),
+ ('podofo', '0.7', 'podofo', 'podofo', 'podofo', 'podofo'),
]
diff --git a/src/calibre/trac/plugins/templates/linux.html b/src/calibre/trac/plugins/templates/linux.html
index 96881aa108..ffaa1d8394 100644
--- a/src/calibre/trac/plugins/templates/linux.html
+++ b/src/calibre/trac/plugins/templates/linux.html
@@ -49,7 +49,7 @@
${app} is available in the software repositories of the following
- linux distributions:
+ supported linux distributions:
diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py
index 48e5d9e720..4d2adfb1c0 100644
--- a/src/calibre/web/feeds/recipes/__init__.py
+++ b/src/calibre/web/feeds/recipes/__init__.py
@@ -42,7 +42,7 @@ recipe_modules = ['recipe_' + r for r in (
'moneynews', 'der_standard', 'diepresse', 'nzz_ger', 'hna',
'seattle_times', 'scott_hanselman', 'coding_horror', 'twitchfilms',
'stackoverflow', 'telepolis_artikel', 'zaobao', 'usnews',
- 'straitstimes',
+ 'straitstimes', 'index_hu', 'pcworld_hu',
)]
import re, imp, inspect, time, os
diff --git a/src/calibre/web/feeds/recipes/recipe_blic.py b/src/calibre/web/feeds/recipes/recipe_blic.py
index e4e4987dec..e212e73218 100644
--- a/src/calibre/web/feeds/recipes/recipe_blic.py
+++ b/src/calibre/web/feeds/recipes/recipe_blic.py
@@ -16,12 +16,14 @@ class Blic(BasicNewsRecipe):
description = 'Blic.co.yu online verzija najtiraznije novine u Srbiji donosi najnovije vesti iz Srbije i sveta, komentare, politicke analize, poslovne i ekonomske vesti, vesti iz regiona, intervjue, informacije iz kulture, reportaze, pokriva sve sportske dogadjaje, detaljan tv program, nagradne igre, zabavu, fenomenalni Blic strip, dnevni horoskop, arhivu svih dogadjaja'
publisher = 'RINGIER d.o.o.'
category = 'news, politics, Serbia'
+ delay = 1
oldest_article = 2
max_articles_per_feed = 100
remove_javascript = True
no_stylesheets = True
use_embedded_content = False
language = _('Serbian')
+ lang = 'sr-Latn-RS'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} '
html2lrf_options = [
@@ -45,26 +47,14 @@ class Blic(BasicNewsRecipe):
start_url, question, rest_url = url.partition('?')
return u'http://www.blic.rs/_print.php?' + rest_url
- def cleanup_image_tags(self,soup):
- for item in soup.findAll('img'):
- for attrib in ['height','width','border','align']:
- if item.has_key(attrib):
- del item[attrib]
- oldParent = item.parent
- myIndex = oldParent.contents.index(item)
- item.extract()
- divtag = Tag(soup,'div')
- brtag = Tag(soup,'br')
- oldParent.insert(myIndex,divtag)
- divtag.append(item)
- divtag.append(brtag)
- return soup
-
-
def preprocess_html(self, soup):
- mtag = ''
- soup.head.insert(0,mtag)
+ mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+ soup.head.insert(0,mlang)
for item in soup.findAll(style=True):
del item['style']
- return self.cleanup_image_tags(soup)
+ return self.adeify_images(soup)
+
+ def get_article_url(self, article):
+ raw = article.get('link', None)
+ return raw.replace('.co.yu','.rs')
\ No newline at end of file
diff --git a/src/calibre/web/feeds/recipes/recipe_index_hu.py b/src/calibre/web/feeds/recipes/recipe_index_hu.py
new file mode 100644
index 0000000000..8b36500e5c
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_index_hu.py
@@ -0,0 +1,20 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Index(BasicNewsRecipe):
+
+ title = u'INDEX.HU'
+ oldest_article = 3
+ max_articles_per_feed = 50
+ language = _('Hungarian')
+ __author__ = 'Ezmegaz'
+
+ feeds = [(u'ALL', u'http://index.hu/24ora/rss/'),
+ (u'BELF\xd6LD', u'http://index.hu/belfold/rss/default/'),
+ (u'K\xdcLF\xd6LD', u'http://index.hu/kulfold/rss/default/'),
+ (u'BULV\xc1R', u'http://index.hu/bulvar/rss/default/'),
+ (u'GAZDAS\xc1G', u'http://index.hu/gazdasag/rss/default/'),
+ (u'TECH', u'http://index.hu/tech/rss/main/'),
+ (u'KULT\xdaRA', u'http://index.hu/kultur/rss/main/'),
+ (u'TUDOM\xc1NY', u'http://index.hu/tudomany/rss/main/'),
+ (u'V\xc9LEM\xc9NY', u'http://index.hu/velemeny/rss/default/')]
+
diff --git a/src/calibre/web/feeds/recipes/recipe_nin.py b/src/calibre/web/feeds/recipes/recipe_nin.py
index fe1e97e8b8..4de53a1049 100644
--- a/src/calibre/web/feeds/recipes/recipe_nin.py
+++ b/src/calibre/web/feeds/recipes/recipe_nin.py
@@ -8,12 +8,13 @@ nin.co.rs
import re, urllib
from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Nin(BasicNewsRecipe):
title = 'NIN online'
__author__ = 'Darko Miletic'
description = 'Nedeljne informativne novine'
- publisher = 'NIN'
+ publisher = 'NIN D.O.O.'
category = 'news, politics, Serbia'
no_stylesheets = True
oldest_article = 15
@@ -28,9 +29,9 @@ class Nin(BasicNewsRecipe):
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
- lang = 'sr-RS'
+ lang = 'sr-Latn-RS'
direction = 'ltr'
- extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+ extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}'
html2lrf_options = [
'--comment' , description
@@ -70,9 +71,10 @@ class Nin(BasicNewsRecipe):
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
- mtag = ''
- mtag += '\n'
- soup.head.insert(0,mtag)
+ mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+ mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
+ soup.head.insert(0,mlang)
+ soup.head.insert(1,mcharset)
for item in soup.findAll(style=True):
del item['style']
return soup
diff --git a/src/calibre/web/feeds/recipes/recipe_pcworld_hu.py b/src/calibre/web/feeds/recipes/recipe_pcworld_hu.py
new file mode 100644
index 0000000000..ad1f1df72a
--- /dev/null
+++ b/src/calibre/web/feeds/recipes/recipe_pcworld_hu.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Index(BasicNewsRecipe):
+
+
+ title = u'PCWORLD.HU'
+ oldest_article = 3
+ max_articles_per_feed = 50
+ language = _('Hungarian')
+ __author__ = 'Ezmegaz'
+
+
+ feeds = [(u'H\xedrek', u'http://pcworld.hu/rss/rss.xml'), (u'Hardver h\xedrek', u'http://www.pcworld.hu/rss/rss_hardverhirek.xml'), (u'Szoftver h\xedrek', u'http://www.pcworld.hu/rss/rss_szoftverhirek.xml'), (u'Hardver cikkek', u'http://www.pcworld.hu/rss/rss_hardvercikkek.xml'), (u'Szoftver cikkek', u'http://www.pcworld.hu/rss/rss_szoftvercikkek.xml'), (u'Mobil h\xedrek', u'http://www.pcworld.hu/rss/rss_mobil.xml'), (u'\xdczleti h\xedrek', u'http://www.pcworld.hu/rss/rss_uzlet.xml'), (u'Let\xf6lt\xe9sek', u'http://www.pcworld.hu/rss/rss_letoltes.xml'), (u'PC World TV', u'http://tv.pcworld.hu/rss/rss_hun_pcw.xml'), (u'Tudta-e...?', u'http://pcworld.hu/rss/rss_tudtae.xml')]
+
diff --git a/src/calibre/web/feeds/recipes/recipe_pobjeda.py b/src/calibre/web/feeds/recipes/recipe_pobjeda.py
index 5afb2b3f6a..6078e6ba0a 100644
--- a/src/calibre/web/feeds/recipes/recipe_pobjeda.py
+++ b/src/calibre/web/feeds/recipes/recipe_pobjeda.py
@@ -10,6 +10,7 @@ pobjeda.co.me
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Pobjeda(BasicNewsRecipe):
title = 'Pobjeda Online'
@@ -22,12 +23,13 @@ class Pobjeda(BasicNewsRecipe):
encoding = 'utf8'
remove_javascript = True
use_embedded_content = False
+ language = _('Serbian')
+ lang = 'sr-Latn-Me'
INDEX = u'http://www.pobjeda.co.me'
- extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+ extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif}'
html2lrf_options = [
'--comment', description
- , '--base-font-size', '10'
, '--category', category
, '--publisher', publisher
]
@@ -59,11 +61,13 @@ class Pobjeda(BasicNewsRecipe):
]
def preprocess_html(self, soup):
- soup.html['xml:lang'] = 'sr-Latn-ME'
- soup.html['lang'] = 'sr-Latn-ME'
- mtag = ''
- soup.head.insert(0,mtag)
- return soup
+ soup.html['xml:lang'] = self.lang
+ soup.html['lang'] = self.lang
+ mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+ mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
+ soup.head.insert(0,mlang)
+ soup.head.insert(1,mcharset)
+ return self.adeify_images(soup)
def get_cover_url(self):
cover_url = None
diff --git a/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py b/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py
index 8c22262904..cc023448c7 100644
--- a/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py
+++ b/src/calibre/web/feeds/recipes/recipe_st_petersburg_times.py
@@ -1,39 +1,48 @@
-#!/usr/bin/env python
-
-__license__ = 'GPL v3'
-__copyright__ = '2008, Darko Miletic '
-'''
-sptimes.ru
-'''
-
-from calibre import strftime
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class PetersburgTimes(BasicNewsRecipe):
- title = u'The St. Petersburg Times'
- __author__ = 'Darko Miletic'
- description = 'News from Russia'
- oldest_article = 7
- max_articles_per_feed = 100
- no_stylesheets = True
- use_embedded_content = False
- language = _('English')
- INDEX = 'http://www.sptimes.ru'
-
- def parse_index(self):
- articles = []
- soup = self.index_to_soup(self.INDEX)
-
- for item in soup.findAll('a', attrs={'class':'story_link_o'}):
- if item.has_key('href'):
- url = self.INDEX + item['href'].replace('action_id=2','action_id=100')
- title = self.tag_to_string(item)
- c_date = strftime('%A, %d %B, %Y')
- description = ''
- articles.append({
- 'title':title,
- 'date':c_date,
- 'url':url,
- 'description':description
- })
- return [(soup.head.title.string, articles)]
+#!/usr/bin/env python
+
+__license__ = 'GPL v3'
+__copyright__ = '2009, Darko Miletic '
+
+'''
+sptimes.ru
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class PetersburgTimes(BasicNewsRecipe):
+ title = 'The St. Petersburg Times'
+ __author__ = 'Darko Miletic'
+ description = 'News from Russia'
+ publisher = 'sptimes.ru'
+ category = 'news, politics, Russia'
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ remove_javascript = True
+ encoding = 'cp1251'
+ use_embedded_content = False
+ language = _('English')
+
+ html2lrf_options = [
+ '--comment', description
+ , '--category', category
+ , '--publisher', publisher
+ , '--ignore-tables'
+ ]
+
+ html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
+
+ remove_tags = [dict(name=['object','link','embed'])]
+
+ feeds = [(u'Headlines', u'http://sptimes.ru/headlines.php' )]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
+
+ def get_article_url(self, article):
+ raw = article.get('guid', None)
+ return raw
+
+ def print_version(self, url):
+ start_url, question, article_id = url.rpartition('/')
+ return u'http://www.sptimes.ru/index.php?action_id=100&story_id=' + article_id
+
\ No newline at end of file
diff --git a/src/calibre/web/feeds/recipes/recipe_vijesti.py b/src/calibre/web/feeds/recipes/recipe_vijesti.py
index 9923193d7b..9ef32e636c 100644
--- a/src/calibre/web/feeds/recipes/recipe_vijesti.py
+++ b/src/calibre/web/feeds/recipes/recipe_vijesti.py
@@ -9,6 +9,7 @@ vijesti.me
import re
from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Vijesti(BasicNewsRecipe):
title = 'Vijesti'
@@ -16,8 +17,8 @@ class Vijesti(BasicNewsRecipe):
description = 'News from Montenegro'
publisher = 'Daily Press Vijesti'
category = 'news, politics, Montenegro'
- oldest_article = 1
- max_articles_per_feed = 100
+ oldest_article = 2
+ max_articles_per_feed = 150
no_stylesheets = True
remove_javascript = True
encoding = 'cp1250'
@@ -25,7 +26,8 @@ class Vijesti(BasicNewsRecipe):
remove_javascript = True
use_embedded_content = False
language = _('Serbian')
- extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
+ lang ='sr-Latn-Me'
+ extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
html2lrf_options = [
'--comment', description
@@ -44,12 +46,15 @@ class Vijesti(BasicNewsRecipe):
feeds = [(u'Sve vijesti', u'http://www.vijesti.me/rss.php' )]
def preprocess_html(self, soup):
- soup.html['xml:lang'] = 'sr-Latn-ME'
- soup.html['lang'] = 'sr-Latn-ME'
- mtag = ''
- soup.head.insert(0,mtag)
- for item in soup.findAll('img'):
- if item.has_key('align'):
- del item['align']
- item.insert(0,'
')
- return soup
+ soup.html['xml:lang'] = self.lang
+ soup.html['lang'] = self.lang
+ mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+ mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
+ soup.head.insert(0,mlang)
+ soup.head.insert(1,mcharset)
+ return self.adeify_images(soup)
+
+ def get_article_url(self, article):
+ raw = article.get('link', None)
+ return raw.replace('.cg.yu','.me')
+
\ No newline at end of file
diff --git a/src/calibre/web/feeds/recipes/recipe_vreme.py b/src/calibre/web/feeds/recipes/recipe_vreme.py
index 1df953cae3..bcc7a14407 100644
--- a/src/calibre/web/feeds/recipes/recipe_vreme.py
+++ b/src/calibre/web/feeds/recipes/recipe_vreme.py
@@ -9,6 +9,7 @@ vreme.com
import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class Vreme(BasicNewsRecipe):
title = 'Vreme'
@@ -27,7 +28,7 @@ class Vreme(BasicNewsRecipe):
language = _('Serbian')
lang = 'sr-Latn-RS'
direction = 'ltr'
- extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{text-align: justify; font-family: serif1, serif} .article_description{font-family: serif1, serif}'
+ extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: serif1, serif} .heading1{font-size: x-large; font-weight: bold} .heading2{font-size: large; font-weight: bold} .toc-heading{font-size: small}'
html2lrf_options = [
'--comment' , description
@@ -89,9 +90,10 @@ class Vreme(BasicNewsRecipe):
del item['size']
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
- mtag = ''
- mtag += '\n'
- soup.head.insert(0,mtag)
+ mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
+ mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=UTF-8")])
+ soup.head.insert(0,mlang)
+ soup.head.insert(1,mcharset)
return soup
def get_cover_url(self):