National Geographic (PL) by Marcin Urban, Caros Amigos by Pablo Aldama, Aksiyon Dergisi by thomass

This commit is contained in:
Kovid Goyal 2011-07-28 13:18:53 -06:00
parent 14a8da9c4d
commit c3a8776229
6 changed files with 189 additions and 3 deletions

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
from calibre.web.feeds.news import BasicNewsRecipe
class Aksiyon (BasicNewsRecipe):
title = u'Aksiyon Dergisi'
__author__ = u'thomass'
description = 'Haftalık haber dergisi '
oldest_article =13
max_articles_per_feed =100
no_stylesheets = True
#delay = 1
#use_embedded_content = False
encoding = 'utf-8'
publisher = 'Aksiyon'
category = 'news, haberler,TR,gazete'
language = 'tr'
publication_type = 'magazine'
#extra_css = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
#keep_only_tags = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
remove_tags = [dict(name='img', attrs={'src':[ 'http://medya.aksiyon.com.tr/aksiyon/images/logo/logo.bmp','/aksiyon/images/template/green/baslik0.gif','mobile/home.jpg']}) ]
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
remove_empty_feeds= True
remove_attributes = ['width','height']
feeds = [
( u'ANASAYFA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=0'),
( u'KARAKUTU', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=11'),
( u'EKONOMİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=35'),
( u'EKOANALİZ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=284'),
( u'YAZARLAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=17'),
( u'KİTAPLIK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=13'),
( u'SİNEMA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=14'),
( u'ARKA PENCERE', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=27'),
( u'DÜNYA', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=32'),
( u'DOSYALAR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=34'),
( u'KÜLTÜR & SANAT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=12'),
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
( u'SPOR', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=38'),
( u'BİLİŞİM - TEKNOLOJİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=39'),
( u'3. BOYUT', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=172'),
( u'HAYAT BİLGİSİ', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
( u'İŞ DÜNYASI', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=283'),
]
def print_version(self, url):
return url.replace('http://www.aksiyon.com.tr/aksiyon/newsDetail_getNewsById.action?load=detay&', 'http://www.aksiyon.com.tr/aksiyon/mobile_detailn.action?')

View File

@ -0,0 +1,17 @@
__copyright__ = '2011, Pablo Aldama <pabloaldama at gmail.com>'
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1311839910(BasicNewsRecipe):
title = u'Caros Amigos'
oldest_article = 20
max_articles_per_feed = 100
language = 'pt_BR'
__author__ = 'Pablo Aldama'
feeds = [(u'Caros Amigos', u'http://carosamigos.terra.com.br/index/index.php?format=feed&type=rss')]
keep_only_tags = [dict(name='div', attrs={'class':['blog']})
,dict(name='div', attrs={'class':['blogcontent']})
]
remove_tags = [dict(name='div', attrs={'class':'addtoany'})]

Binary file not shown.

After

Width:  |  Height:  |  Size: 894 B

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = 'Marcin Urban 2011'
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class recipeMagic(BasicNewsRecipe):
title = 'National Geographic PL'
__author__ = 'Marcin Urban 2011'
description = 'legenda wśród magazynów z historią sięgającą 120 lat'
cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
publisher = 'G+J Gruner+Jahr Polska'
category = 'news, PL,'
language = 'pl'
publication_type = 'newsportal'
extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
h1{text-align: center;}
h2{font-size: medium; font-weight: bold;}
.authordate {font-size: small; color: #696969;}
p.lead {font-weight: bold; text-align: center;}
.fot{font-size: x-small; color: #666666;} '''
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
remove_tags = [
dict(name='div', attrs={'class':'add_inf'}),
dict(name='div', attrs={'class':'add_f'}),
]
remove_attributes = ['width','height']
feeds = [
('National Geographic PL', 'http://www.national-geographic.pl/rss/'),
]
def print_version(self, url):
return url.replace('artykuly0Cpokaz', 'drukuj-artykul')

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, subprocess, tempfile
from calibre.constants import iswindows
from calibre.customize.ui import plugin_for_output_format
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.mobi.utils import detect_periodical
from calibre import CurrentDir
exe = 'kindlegen.exe' if iswindows else 'kindlegen'
def refactor_opf(opf, is_periodical):
pass
def refactor_guide(oeb):
for key in list(oeb.guide):
if key not in ('toc', 'start'):
oeb.guide.remove(key)
def run_kindlegen(opf, log):
log.info('Running kindlegen on MOBIML created by calibre')
oname = os.path.splitext(opf)[0] + '.mobi'
with tempfile.NamedTemporaryFile('_kindlegen_output.txt') as tfile:
p = subprocess.Popen([exe, opf, '-c1', '-verbose', '-o', oname],
stderr=subprocess.STDOUT, stdout=tfile)
returncode = p.wait()
tfile.seek(0)
log.debug('kindlegen verbose output:')
log.debug(tfile.read())
log.info('kindlegen returned returncode: %d'%returncode)
if not os.path.exists(oname) or os.stat(oname).st_size < 100:
raise RuntimeError('kindlegen did not produce any output. '
'kindlegen return code: %d'%returncode)
return oname
def kindlegen(oeb, opts, input_plugin, output_path):
is_periodical = detect_periodical(oeb.toc, oeb.log)
refactor_guide(oeb)
with TemporaryDirectory('_epub_output') as tdir:
oeb_output = plugin_for_output_format('oeb')
oeb_output.convert(oeb, tdir, input_plugin, opts, oeb.log)
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
refactor_opf(opf, is_periodical)
with CurrentDir(tdir):
run_kindlegen(opf, oeb.log)

View File

@ -50,6 +50,12 @@ class MOBIOutput(OutputFormatPlugin):
help=_('When adding the Table of Contents to the book, add it at the start of the '
'book instead of the end. Not recommended.')
),
OptionRecommendation(name='kindlegen',
recommended_value=False,
help=_('Use kindlegen (must be in your PATH) to generate the'
' binary wrapper for the MOBI format. Useful to debug '
' the calibre MOBI output.')
),
])
@ -164,6 +170,10 @@ class MOBIOutput(OutputFormatPlugin):
MobiWriter
else:
from calibre.ebooks.mobi.writer import MobiWriter
if opts.kindlegen:
from calibre.ebooks.mobi.kindlegen import kindlegen
kindlegen(oeb, opts, input_plugin, output_path)
else:
writer = MobiWriter(opts,
write_page_breaks_after_item=write_page_breaks_after_item)
writer(oeb, output_path)