mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
d9c9accdda
17
resources/recipes/20_minutos.recipe
Normal file
17
resources/recipes/20_minutos.recipe
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1295310874(BasicNewsRecipe):
|
||||||
|
title = u'20 Minutos (Boletin)'
|
||||||
|
__author__ = 'Luis Hernandez'
|
||||||
|
description = 'Periódico gratuito en español'
|
||||||
|
cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
|
||||||
|
language = 'es'
|
||||||
|
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
|
||||||
|
feeds = [(u'VESPERTINO', u'http://20minutos.feedsportal.com/c/32489/f/478284/index.rss')
|
||||||
|
, (u'DEPORTES', u'http://20minutos.feedsportal.com/c/32489/f/478286/index.rss')
|
||||||
|
, (u'CULTURA', u'http://www.20minutos.es/rss/ocio/')
|
||||||
|
, (u'TV', u'http://20minutos.feedsportal.com/c/32489/f/490877/index.rss')
|
||||||
|
]
|
43
resources/recipes/abc.recipe
Normal file
43
resources/recipes/abc.recipe
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ABCRecipe(BasicNewsRecipe):
|
||||||
|
title = u'ABC Linuxu'
|
||||||
|
oldest_article = 5
|
||||||
|
max_articles_per_feed = 3#5
|
||||||
|
__author__ = 'Funthomas'
|
||||||
|
language = 'cs'
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
#(u'Blogy', u'http://www.abclinuxu.cz/auto/blogDigest.rss'),
|
||||||
|
(u'Články', u'http://www.abclinuxu.cz/auto/abc.rss'),
|
||||||
|
(u'Zprávičky','http://www.abclinuxu.cz/auto/zpravicky.rss')
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='h1')
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':['meta-vypis','page_tools','cl_perex']}),
|
||||||
|
dict(attrs={'class':['cl_nadpis-link','komix-nav']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(name='div',attrs={'class':['cl_perex','komix-nav']}),
|
||||||
|
dict(attrs={'class':['meta-vypis','page_tools']}),
|
||||||
|
dict(name='',attrs={'':''}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'</div>.*<p class="perex">', re.DOTALL),lambda match: '</div><p class="perex">')
|
||||||
|
]
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?varianta=print&noDiz'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:130%; font-weight:bold}
|
||||||
|
h3 {font-size:111%; font-weight:bold}
|
||||||
|
'''
|
54
resources/recipes/idnes.recipe
Normal file
54
resources/recipes/idnes.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
class iHeuteRecipe(BasicNewsRecipe):
|
||||||
|
__author__ = 'FunThomas'
|
||||||
|
title = u'iDnes.cz'
|
||||||
|
publisher = u'MAFRA a.s.'
|
||||||
|
description = 'iDNES.cz Zprávy, Technet, Komiksy a další'
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 2
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Zprávy', u'http://servis.idnes.cz/rss.asp?c=zpravodaj'),
|
||||||
|
(u'Sport', u'http://servis.idnes.cz/rss.asp?c=sport'),
|
||||||
|
(u'Technet', u'http://servis.idnes.cz/rss.asp?c=technet'),
|
||||||
|
(u'Mobil', u'http://servis.idnes.cz/rss.asp?c=mobil'),
|
||||||
|
(u'Ekonomika', u'http://servis.idnes.cz/rss.asp?c=ekonomikah'),
|
||||||
|
#(u'Kultura', u'http://servis.idnes.cz/rss.asp?c=kultura'),
|
||||||
|
(u'Cestování', u'http://servis.idnes.cz/rss.asp?c=iglobe'),
|
||||||
|
#(u'Kavárna', u'http://servis.idnes.cz/rss.asp?r=kavarna'),
|
||||||
|
(u'Komixy', u'http://servis.idnes.cz/rss.asp?c=komiksy')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
encoding = 'cp1250'
|
||||||
|
language = 'cs'
|
||||||
|
cover_url = 'http://g.idnes.cz/u/loga-n4/idnes.gif'
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
remove_attributes = ['width','height']
|
||||||
|
remove_tags = [dict(name='div', attrs={'id':['zooming']}),
|
||||||
|
dict(name='div', attrs={'class':['related','mapa-wrapper']}),
|
||||||
|
dict(name='table', attrs={'id':['opener-img','portal']}),
|
||||||
|
dict(name='table', attrs={'class':['video-16ku9']})]
|
||||||
|
remove_tags_after = [dict(name='div',attrs={'id':['related','related2']})]
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':['art-full adwords-text','dil-day']})
|
||||||
|
,dict(name='table',attrs={'class':['kemel-box']})]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
print_url = url
|
||||||
|
split_url = url.split("?")
|
||||||
|
if (split_url[0].rfind('dilbert.asp') != -1): #dilbert komix
|
||||||
|
print_url = print_url.replace('.htm','.gif&tisk=1')
|
||||||
|
print_url = print_url.replace('.asp','.aspx')
|
||||||
|
elif (split_url[0].rfind('kemel.asp') == -1): #not Kemel komix
|
||||||
|
print_url = 'http://zpravy.idnes.cz/tiskni.asp?' + split_url[1]
|
||||||
|
#kemel kemel print page doesn't work
|
||||||
|
return print_url
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:125%; font-weight:bold}
|
||||||
|
h3 {font-size:110%; font-weight:bold}
|
||||||
|
'''
|
29
resources/recipes/la_tribuna.recipe
Normal file
29
resources/recipes/la_tribuna.recipe
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1294946868(BasicNewsRecipe):
|
||||||
|
title = u'La Tribuna de Talavera'
|
||||||
|
__author__ = 'Luis Hernández'
|
||||||
|
description = 'Diario de Talavera de la Reina'
|
||||||
|
cover_url = 'http://www.latribunadetalavera.es/entorno/mancheta.gif'
|
||||||
|
|
||||||
|
oldest_article = 5
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = 'es'
|
||||||
|
timefmt = '[%a, %d %b, %Y]'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':['articulo']})
|
||||||
|
,dict(name='div', attrs={'class':['foto']})
|
||||||
|
,dict(name='p', attrs={'id':['texto']})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='div' , attrs={'class':['comparte']})
|
||||||
|
remove_tags_after = dict(name='div' , attrs={'id':['relacionadas']})
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(u'Portada', u'http://www.latribunadetalavera.es/rss.html')]
|
@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
'''
|
'''
|
||||||
@ -28,6 +27,10 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
# previous paid versions of the new york times to best sent to the back issues folder on the kindle
|
# previous paid versions of the new york times to best sent to the back issues folder on the kindle
|
||||||
replaceKindleVersion = False
|
replaceKindleVersion = False
|
||||||
|
|
||||||
|
# download higher resolution images than the small thumbnails typically included in the article
|
||||||
|
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
|
||||||
|
useHighResImages = True
|
||||||
|
|
||||||
# includeSections: List of sections to include. If empty, all sections found will be included.
|
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||||
# Otherwise, only the sections named will be included. For example,
|
# Otherwise, only the sections named will be included. For example,
|
||||||
#
|
#
|
||||||
@ -90,7 +93,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
(u'Sunday Magazine',u'magazine'),
|
(u'Sunday Magazine',u'magazine'),
|
||||||
(u'Week in Review',u'weekinreview')]
|
(u'Week in Review',u'weekinreview')]
|
||||||
|
|
||||||
|
|
||||||
if headlinesOnly:
|
if headlinesOnly:
|
||||||
title='New York Times Headlines'
|
title='New York Times Headlines'
|
||||||
description = 'Headlines from the New York Times'
|
description = 'Headlines from the New York Times'
|
||||||
@ -127,7 +129,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
earliest_date = date.today() - timedelta(days=oldest_article)
|
earliest_date = date.today() - timedelta(days=oldest_article)
|
||||||
|
|
||||||
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
__author__ = 'GRiker/Kovid Goyal/Nick Redding/Ben Collier'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
requires_version = (0, 7, 5)
|
requires_version = (0, 7, 5)
|
||||||
|
|
||||||
@ -149,7 +151,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'dottedLine',
|
'dottedLine',
|
||||||
'entry-meta',
|
'entry-meta',
|
||||||
'entry-response module',
|
'entry-response module',
|
||||||
'icon enlargeThis',
|
#'icon enlargeThis', #removed to provide option for high res images
|
||||||
'leftNavTabs',
|
'leftNavTabs',
|
||||||
'metaFootnote',
|
'metaFootnote',
|
||||||
'module box nav',
|
'module box nav',
|
||||||
@ -163,7 +165,23 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'entry-tags', #added for DealBook
|
'entry-tags', #added for DealBook
|
||||||
'footer promos clearfix', #added for DealBook
|
'footer promos clearfix', #added for DealBook
|
||||||
'footer links clearfix', #added for DealBook
|
'footer links clearfix', #added for DealBook
|
||||||
'inlineImage module', #added for DealBook
|
'tabsContainer', #added for other blog downloads
|
||||||
|
'column lastColumn', #added for other blog downloads
|
||||||
|
'pageHeaderWithLabel', #added for other gadgetwise downloads
|
||||||
|
'column two', #added for other blog downloads
|
||||||
|
'column two last', #added for other blog downloads
|
||||||
|
'column three', #added for other blog downloads
|
||||||
|
'column three last', #added for other blog downloads
|
||||||
|
'column four',#added for other blog downloads
|
||||||
|
'column four last',#added for other blog downloads
|
||||||
|
'column last', #added for other blog downloads
|
||||||
|
'timestamp published', #added for other blog downloads
|
||||||
|
'entry entry-related',
|
||||||
|
'subNavigation tabContent active', #caucus blog navigation
|
||||||
|
'columnGroup doubleRule',
|
||||||
|
'mediaOverlay slideshow',
|
||||||
|
'headlinesOnly multiline flush',
|
||||||
|
'wideThumb',
|
||||||
re.compile('^subNavigation'),
|
re.compile('^subNavigation'),
|
||||||
re.compile('^leaderboard'),
|
re.compile('^leaderboard'),
|
||||||
re.compile('^module'),
|
re.compile('^module'),
|
||||||
@ -254,7 +272,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
def exclude_url(self,url):
|
def exclude_url(self,url):
|
||||||
if not url.startswith("http"):
|
if not url.startswith("http"):
|
||||||
return True
|
return True
|
||||||
if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook
|
if not url.endswith(".html") and 'dealbook.nytimes.com' not in url and 'blogs.nytimes.com' not in url: #added for DealBook
|
||||||
return True
|
return True
|
||||||
if 'nytimes.com' not in url:
|
if 'nytimes.com' not in url:
|
||||||
return True
|
return True
|
||||||
@ -480,7 +498,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
for lidiv in div.findAll('li'):
|
for lidiv in div.findAll('li'):
|
||||||
if not skipping:
|
if not skipping:
|
||||||
self.handle_article(lidiv)
|
self.handle_article(lidiv)
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||||
return self.filter_ans(self.ans)
|
return self.filter_ans(self.ans)
|
||||||
|
|
||||||
@ -591,20 +609,85 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if article_date < self.earliest_date:
|
if article_date < self.earliest_date:
|
||||||
self.log("Skipping article dated %s" % date_str)
|
self.log("Skipping article dated %s" % date_str)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
#all articles are from today, no need to print the date on every page
|
||||||
|
try:
|
||||||
|
if not self.webEdition:
|
||||||
|
date_tag = soup.find(True,attrs={'class': ['dateline','date']})
|
||||||
|
if date_tag:
|
||||||
|
date_tag.extract()
|
||||||
|
except:
|
||||||
|
self.log("Error removing the published date")
|
||||||
|
|
||||||
kicker_tag = soup.find(attrs={'class':'kicker'})
|
if self.useHighResImages:
|
||||||
if kicker_tag: # remove Op_Ed author head shots
|
try:
|
||||||
tagline = self.tag_to_string(kicker_tag)
|
#open up all the "Enlarge this Image" pop-ups and download the full resolution jpegs
|
||||||
if tagline=='Op-Ed Columnist':
|
enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
|
||||||
img_div = soup.find('div','inlineImage module')
|
if enlargeThisList:
|
||||||
if img_div:
|
for popupref in enlargeThisList:
|
||||||
img_div.extract()
|
popupreflink = popupref.find('a')
|
||||||
|
if popupreflink:
|
||||||
|
reflinkstring = str(popupreflink['href'])
|
||||||
|
refstart = reflinkstring.find("javascript:pop_me_up2('") + len("javascript:pop_me_up2('")
|
||||||
|
refend = reflinkstring.find(".html", refstart) + len(".html")
|
||||||
|
reflinkstring = reflinkstring[refstart:refend]
|
||||||
|
|
||||||
|
popuppage = self.browser.open(reflinkstring)
|
||||||
|
popuphtml = popuppage.read()
|
||||||
|
popuppage.close()
|
||||||
|
if popuphtml:
|
||||||
|
st = time.localtime()
|
||||||
|
year = str(st.tm_year)
|
||||||
|
month = "%.2d" % st.tm_mon
|
||||||
|
day = "%.2d" % st.tm_mday
|
||||||
|
imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/')
|
||||||
|
highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
|
||||||
|
popupSoup = BeautifulSoup(popuphtml)
|
||||||
|
highResTag = popupSoup.find('img', {'src':highResImageLink})
|
||||||
|
if highResTag:
|
||||||
|
try:
|
||||||
|
newWidth = highResTag['width']
|
||||||
|
newHeight = highResTag['height']
|
||||||
|
imageTag = popupref.parent.find("img")
|
||||||
|
except:
|
||||||
|
self.log("Error: finding width and height of img")
|
||||||
|
popupref.extract()
|
||||||
|
if imageTag:
|
||||||
|
try:
|
||||||
|
imageTag['src'] = highResImageLink
|
||||||
|
imageTag['width'] = newWidth
|
||||||
|
imageTag['height'] = newHeight
|
||||||
|
except:
|
||||||
|
self.log("Error setting the src width and height parameters")
|
||||||
|
except Exception as e:
|
||||||
|
self.log("Error pulling high resolution images")
|
||||||
|
|
||||||
|
try:
|
||||||
|
#remove "Related content" bar
|
||||||
|
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline']})
|
||||||
|
if runAroundsFound:
|
||||||
|
for runAround in runAroundsFound:
|
||||||
|
#find all section headers
|
||||||
|
hlines = runAround.findAll(True ,{'class':['sectionHeader','sectionHeader flushBottom']})
|
||||||
|
if hlines:
|
||||||
|
for hline in hlines:
|
||||||
|
hline.extract()
|
||||||
|
except:
|
||||||
|
self.log("Error removing related content bar")
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
#in case pulling images failed, delete the enlarge this text
|
||||||
|
enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
|
||||||
|
if enlargeThisList:
|
||||||
|
for popupref in enlargeThisList:
|
||||||
|
popupref.extract()
|
||||||
|
except:
|
||||||
|
self.log("Error removing Enlarge this text")
|
||||||
|
|
||||||
return self.strip_anchors(soup)
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
def postprocess_html(self,soup, True):
|
def postprocess_html(self,soup, True):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if self.one_picture_per_article:
|
if self.one_picture_per_article:
|
||||||
# Remove all images after first
|
# Remove all images after first
|
||||||
@ -766,6 +849,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
try:
|
try:
|
||||||
if len(article.text_summary.strip()) == 0:
|
if len(article.text_summary.strip()) == 0:
|
||||||
articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
|
articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
|
||||||
|
if not articlebodies: #added to account for blog formats
|
||||||
|
articlebodies = soup.findAll('div', attrs={'class':'entry-content'}) #added to account for blog formats
|
||||||
if articlebodies:
|
if articlebodies:
|
||||||
for articlebody in articlebodies:
|
for articlebody in articlebodies:
|
||||||
if articlebody:
|
if articlebody:
|
||||||
@ -774,13 +859,14 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
|
refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
|
||||||
#account for blank paragraphs and short paragraphs by appending them to longer ones
|
#account for blank paragraphs and short paragraphs by appending them to longer ones
|
||||||
if len(refparagraph) > 0:
|
if len(refparagraph) > 0:
|
||||||
if len(refparagraph) > 70: #approximately one line of text
|
if len(refparagraph) > 140: #approximately two lines of text
|
||||||
article.summary = article.text_summary = shortparagraph + refparagraph
|
article.summary = article.text_summary = shortparagraph + refparagraph
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
shortparagraph = refparagraph + " "
|
shortparagraph = refparagraph + " "
|
||||||
if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
|
if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
|
||||||
shortparagraph = shortparagraph + "- "
|
shortparagraph = shortparagraph + "- "
|
||||||
|
|
||||||
except:
|
except:
|
||||||
self.log("Error creating article descriptions")
|
self.log("Error creating article descriptions")
|
||||||
return
|
return
|
||||||
|
39
resources/recipes/root.recipe
Normal file
39
resources/recipes/root.recipe
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1289939440(BasicNewsRecipe):
|
||||||
|
__author__ = 'FunThomas'
|
||||||
|
title = u'Root.cz'
|
||||||
|
description = u'Zprávičky a články z Root.cz'
|
||||||
|
publisher = u'Internet Info, s.r.o'
|
||||||
|
oldest_article = 2 #max stari clanku ve dnech
|
||||||
|
max_articles_per_feed = 50 #max pocet clanku na feed
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Články', u'http://www.root.cz/rss/clanky/'),
|
||||||
|
(u'Zprávičky', u'http://www.root.cz/rss/zpravicky/')
|
||||||
|
]
|
||||||
|
|
||||||
|
publication_type = u'magazine'
|
||||||
|
language = u'cs'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
cover_url = u'http://i.iinfo.cz/urs/logo-root-bila-oranzova-cerna-111089527143118.gif'
|
||||||
|
|
||||||
|
remove_attributes = ['width','height','href'] #,'href'
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h1'),
|
||||||
|
dict(name='a',attrs={'class':'author'}),
|
||||||
|
dict(name='p', attrs={'class':'intro'}),
|
||||||
|
dict(name='div',attrs={'class':'urs'})
|
||||||
|
]
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(u'<p class="perex[^"]*">[^<]*<img[^>]*>', re.DOTALL),lambda match: '<p class="intro">'),
|
||||||
|
(re.compile(u'<h3><a name="tucnak">Tričko tučňák.*</body>', re.DOTALL),lambda match: '<!--deleted-->')
|
||||||
|
]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h1 {font-size:130%; font-weight:bold}
|
||||||
|
h3 {font-size:111%; font-weight:bold}
|
||||||
|
'''
|
33
resources/recipes/sinfest.recipe
Normal file
33
resources/recipes/sinfest.recipe
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Nadid <nadid.skywalker at gmail.com>'
|
||||||
|
'''
|
||||||
|
http://www.sinfest.net
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class SinfestBig(BasicNewsRecipe):
|
||||||
|
title = 'Sinfest'
|
||||||
|
__author__ = 'nadid'
|
||||||
|
description = 'Sinfest'
|
||||||
|
reverse_article_order = False
|
||||||
|
oldest_article = 5
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = True
|
||||||
|
encoding = 'utf-8'
|
||||||
|
publisher = 'Tatsuya Ishida/Museworks'
|
||||||
|
category = 'comic'
|
||||||
|
language = 'en'
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description
|
||||||
|
,'tags' : category
|
||||||
|
,'language' : language
|
||||||
|
,'publisher' : publisher
|
||||||
|
}
|
||||||
|
|
||||||
|
feeds = [(u'SinFest', u'http://henrik.nyh.se/scrapers/sinfest.rss' )]
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('link')
|
||||||
|
|
9
src/calibre/ebooks/metadata/sources/__init__.py
Normal file
9
src/calibre/ebooks/metadata/sources/__init__.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -42,9 +42,15 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
|
|||||||
def break_cycles(self):
|
def break_cycles(self):
|
||||||
Widget.break_cycles(self)
|
Widget.break_cycles(self)
|
||||||
|
|
||||||
self.opt_sr1_search.doc_update.disconnect()
|
def d(x):
|
||||||
self.opt_sr2_search.doc_update.disconnect()
|
try:
|
||||||
self.opt_sr3_search.doc_update.disconnect()
|
x.disconnect()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
d(self.opt_sr1_search)
|
||||||
|
d(self.opt_sr2_search)
|
||||||
|
d(self.opt_sr3_search)
|
||||||
|
|
||||||
self.opt_sr1_search.break_cycles()
|
self.opt_sr1_search.break_cycles()
|
||||||
self.opt_sr2_search.break_cycles()
|
self.opt_sr2_search.break_cycles()
|
||||||
|
@ -310,7 +310,9 @@ What formats does |app| read metadata from?
|
|||||||
|
|
||||||
Where are the book files stored?
|
Where are the book files stored?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
When you first run |app|, it will ask you for a folder in which to store your books. Whenever you add a book to |app|, it will copy the book into that folder. Books in the folder are nicely arranged into sub-folders by Author and Title. Metadata about the books is stored in the file ``metadata.db`` (which is a sqlite database).
|
When you first run |app|, it will ask you for a folder in which to store your books. Whenever you add a book to |app|, it will copy the book into that folder. Books in the folder are nicely arranged into sub-folders by Author and Title. Note that the contents of this folder are automatically managed by |app|, **do not** add any files/folders manually to this folder, as they may be automatically deleted. If you want to add a file associated to a particular book, use the top right area of :guilabel:`Edit metadata` dialog to do so. Then, |app| will automatically put that file into the correct folder and move it around when the title/author changes.
|
||||||
|
|
||||||
|
Metadata about the books is stored in the file ``metadata.db`` at the top level of the library folder This file is is a sqlite database. When backing up your library make sure you copy the entire folder and all its sub-folders.
|
||||||
|
|
||||||
Why doesn't |app| let me store books in my own directory structure?
|
Why doesn't |app| let me store books in my own directory structure?
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
Loading…
x
Reference in New Issue
Block a user