mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
GwR and KG revisions pre 6.35
This commit is contained in:
commit
636cfbb769
@ -119,6 +119,8 @@
|
|||||||
- title: stuff.co.nz
|
- title: stuff.co.nz
|
||||||
author: Krittika Goyal
|
author: Krittika Goyal
|
||||||
|
|
||||||
|
- title: Editor and Publisher
|
||||||
|
author: XanthanGum
|
||||||
|
|
||||||
improved recipes:
|
improved recipes:
|
||||||
- Physics Today
|
- Physics Today
|
||||||
@ -127,6 +129,7 @@
|
|||||||
- FTD
|
- FTD
|
||||||
- The National Post
|
- The National Post
|
||||||
- Blic
|
- Blic
|
||||||
|
- Ars Technica
|
||||||
|
|
||||||
|
|
||||||
- version: 0.6.34
|
- version: 0.6.34
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
arstechnica.com
|
arstechnica.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
|
|
||||||
class ArsTechnica2(BasicNewsRecipe):
|
class ArsTechnica2(BasicNewsRecipe):
|
||||||
title = u'Ars Technica'
|
title = u'Ars Technica'
|
||||||
@ -18,24 +18,24 @@ class ArsTechnica2(BasicNewsRecipe):
|
|||||||
oldest_article = 2
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf-8'
|
||||||
remove_javascript = True
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
extra_css = ' body {font-family: sans-serif} .byline{font-weight: bold; line-height: 1em; font-size: 0.625em; text-decoration: none} '
|
||||||
|
|
||||||
extra_css = '''
|
conversion_options = {
|
||||||
.news-item-title{font-size: medium ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
|
'comments' : description
|
||||||
.news-item-teaser{font-size: small ;font-family:Arial,Helvetica,sans-serif; font-weight:bold;}
|
,'tags' : category
|
||||||
.news-item-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
,'language' : language
|
||||||
.news-item-text{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
|
,'publisher' : publisher
|
||||||
.news-item-figure-caption-text{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:bold;}
|
}
|
||||||
.news-item-figure-caption-byline{font-size:xx-small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['news-item-info','news-item']})]
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':['story','etc-story']})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object','link','embed'])
|
dict(name=['object','link','embed'])
|
||||||
,dict(name='div', attrs={'class':'related-stories'})
|
,dict(name='div', attrs={'class':'read-more-link'})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -52,14 +52,19 @@ class ArsTechnica2(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def append_page(self, soup, appendtag, position):
|
def append_page(self, soup, appendtag, position):
|
||||||
pager = soup.find('div',attrs={'id':'pager'})
|
pager = soup.find('div',attrs={'class':'pager'})
|
||||||
if pager:
|
if pager:
|
||||||
for atag in pager.findAll('a',href=True):
|
for atag in pager.findAll('a',href=True):
|
||||||
str = self.tag_to_string(atag)
|
str = self.tag_to_string(atag)
|
||||||
if str.startswith('Next'):
|
if str.startswith('Next'):
|
||||||
soup2 = self.index_to_soup(atag['href'])
|
nurl = 'http://arstechnica.com' + atag['href']
|
||||||
|
rawc = self.index_to_soup(nurl,True)
|
||||||
|
soup2 = BeautifulSoup(rawc, fromEncoding=self.encoding)
|
||||||
|
|
||||||
texttag = soup2.find('div', attrs={'class':'news-item-text'})
|
readmoretag = soup2.find('div', attrs={'class':'read-more-link'})
|
||||||
|
if readmoretag:
|
||||||
|
readmoretag.extract()
|
||||||
|
texttag = soup2.find('div', attrs={'class':'body'})
|
||||||
for it in texttag.findAll(style=True):
|
for it in texttag.findAll(style=True):
|
||||||
del it['style']
|
del it['style']
|
||||||
|
|
||||||
@ -71,10 +76,12 @@ class ArsTechnica2(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
ftag = soup.find('div', attrs={'class':'byline'})
|
||||||
ftag = soup.find('div', attrs={'class':'news-item-byline'})
|
|
||||||
if ftag:
|
if ftag:
|
||||||
ftag.insert(4,'<br /><br />')
|
brtag = Tag(soup,'br')
|
||||||
|
brtag2 = Tag(soup,'br')
|
||||||
|
ftag.insert(4,brtag)
|
||||||
|
ftag.insert(5,brtag2)
|
||||||
|
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
del item['style']
|
del item['style']
|
||||||
@ -83,5 +90,7 @@ class ArsTechnica2(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
return article.get('feedburner_origlink', None).rpartition('?')[0]
|
||||||
|
|
||||||
|
|
||||||
|
34
resources/recipes/editor_and_publisher.recipe
Normal file
34
resources/recipes/editor_and_publisher.recipe
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class EandP(BasicNewsRecipe):
|
||||||
|
title = u'Editor and Publisher'
|
||||||
|
__author__ = u'Xanthan Gum'
|
||||||
|
description = 'News about newspapers and journalism.'
|
||||||
|
language = 'en'
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
# Font formatting code borrowed from kwetal
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
|
||||||
|
h1{font-size: xx-large;}
|
||||||
|
h2{font-size: large;}
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Delete everything before the article
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='font', attrs={'class':'titlebar_black'})
|
||||||
|
|
||||||
|
# Delete everything after the article
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(r'<!--endclickprintinclude-->.*</body>', re.DOTALL|re.IGNORECASE),
|
||||||
|
lambda match: '</body>'),]
|
||||||
|
|
||||||
|
feeds = [(u'Breaking News', u'http://feeds.feedburner.com/EditorAndPublisher-BreakingNews'),
|
||||||
|
(u'Business News', u'http://feeds.feedburner.com/EditorAndPublisher-BusinessNews'),
|
||||||
|
(u'Newsroom', u'http://feeds.feedburner.com/EditorAndPublisher-Newsroom'),
|
||||||
|
(u'Technology News', u'http://feeds.feedburner.com/EditorAndPublisher-Technology'),
|
||||||
|
(u'Syndicates News', u'http://feeds.feedburner.com/EditorAndPublisher-Syndicates')]
|
@ -34,23 +34,19 @@ class PluginWidget(QWidget,Ui_Form):
|
|||||||
def initialize(self, name):
|
def initialize(self, name):
|
||||||
self.name = name
|
self.name = name
|
||||||
# Restore options from last use here
|
# Restore options from last use here
|
||||||
print "gui2.catalog.catalog_epub_mobi:initialize(): Retrieving options"
|
|
||||||
for opt in self.OPTION_FIELDS:
|
for opt in self.OPTION_FIELDS:
|
||||||
opt_value = gprefs[self.name + '_' + opt[0]]
|
opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
|
||||||
print "Restoring %s: %s" % (self.name + '_' + opt[0], opt_value)
|
getattr(self, opt[0]).setText(opt_value)
|
||||||
setattr(self,opt[0], unicode(opt_value))
|
|
||||||
|
|
||||||
def options(self):
|
def options(self):
|
||||||
|
|
||||||
# Save/return the current options
|
# Save/return the current options
|
||||||
# getattr() returns text value of QLineEdit control
|
|
||||||
print "gui2.catalog.catalog_epub_mobi:options(): Saving options"
|
|
||||||
opts_dict = {}
|
opts_dict = {}
|
||||||
for opt in self.OPTION_FIELDS:
|
for opt in self.OPTION_FIELDS:
|
||||||
opt_value = unicode(getattr(self,opt[0]))
|
opt_value = unicode(getattr(self, opt[0]).text())
|
||||||
print "writing %s to gprefs" % opt_value
|
|
||||||
gprefs.set(self.name + '_' + opt[0], opt_value)
|
gprefs.set(self.name + '_' + opt[0], opt_value)
|
||||||
opts_dict[opt[0]] = opt_value.split(',')
|
if opt[0] == 'exclude_tags':
|
||||||
|
opt_value = opt_value.split(',')
|
||||||
|
opts_dict[opt[0]] = opt_value
|
||||||
|
|
||||||
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
|
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
<property name="windowTitle">
|
<property name="windowTitle">
|
||||||
<string>Form</string>
|
<string>Form</string>
|
||||||
</property>
|
</property>
|
||||||
|
<<<<<<< TREE
|
||||||
<widget class="QLabel" name="label">
|
<widget class="QLabel" name="label">
|
||||||
<property name="geometry">
|
<property name="geometry">
|
||||||
<rect>
|
<rect>
|
||||||
@ -158,6 +159,85 @@
|
|||||||
<set>Qt::AlignCenter</set>
|
<set>Qt::AlignCenter</set>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
|
=======
|
||||||
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
|
<item row="0" column="0">
|
||||||
|
<widget class="QLabel" name="label">
|
||||||
|
<property name="text">
|
||||||
|
<string>Tags to exclude as genres (regex):</string>
|
||||||
|
</property>
|
||||||
|
<property name="textFormat">
|
||||||
|
<enum>Qt::LogText</enum>
|
||||||
|
</property>
|
||||||
|
<property name="wordWrap">
|
||||||
|
<bool>true</bool>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="0">
|
||||||
|
<widget class="QLabel" name="label_2">
|
||||||
|
<property name="text">
|
||||||
|
<string>'Don't include this book' tag:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="1">
|
||||||
|
<widget class="QLineEdit" name="exclude_tags">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string extracomment="Tooltip comment here"/>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="2" column="0">
|
||||||
|
<widget class="QLabel" name="label_3">
|
||||||
|
<property name="text">
|
||||||
|
<string>'Mark this book as read' tag:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="2" column="1">
|
||||||
|
<widget class="QLineEdit" name="read_tag">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string extracomment="Tooltip comment here"/>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="3" column="0">
|
||||||
|
<widget class="QLabel" name="label_4">
|
||||||
|
<property name="text">
|
||||||
|
<string>Additional note tag prefix:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="3" column="1">
|
||||||
|
<widget class="QLineEdit" name="note_tag">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string extracomment="Tooltip comment here"/>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="4" column="0">
|
||||||
|
<spacer name="verticalSpacer">
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Qt::Vertical</enum>
|
||||||
|
</property>
|
||||||
|
<property name="sizeHint" stdset="0">
|
||||||
|
<size>
|
||||||
|
<width>20</width>
|
||||||
|
<height>40</height>
|
||||||
|
</size>
|
||||||
|
</property>
|
||||||
|
</spacer>
|
||||||
|
</item>
|
||||||
|
<item row="0" column="1">
|
||||||
|
<widget class="QLineEdit" name="exclude_genre">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string extracomment="Tooltip comment here"/>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
>>>>>>> MERGE-SOURCE
|
||||||
</widget>
|
</widget>
|
||||||
<resources/>
|
<resources/>
|
||||||
<connections/>
|
<connections/>
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import pickle, os, re, shutil
|
import pickle, os, re, shutil, htmlentitydefs
|
||||||
|
|
||||||
from xml.sax.saxutils import escape
|
from xml.sax.saxutils import escape
|
||||||
|
|
||||||
@ -291,7 +291,6 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
# intToTranslate is a three-digit number
|
# intToTranslate is a three-digit number
|
||||||
|
|
||||||
tensComponentString = ""
|
tensComponentString = ""
|
||||||
hundredsComponenetString = ""
|
|
||||||
|
|
||||||
hundredsComponent = intToTranslate - (intToTranslate % 100)
|
hundredsComponent = intToTranslate - (intToTranslate % 100)
|
||||||
tensComponent = intToTranslate % 100
|
tensComponent = intToTranslate % 100
|
||||||
@ -1366,7 +1365,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
# Check to see if cover is newer than thumbnail
|
# Check to see if cover is newer than thumbnail
|
||||||
# os.path.getmtime() = modified time
|
# os.path.getmtime() = modified time
|
||||||
# os.path.ctime() = creation time
|
# os.path.ctime() = creation time
|
||||||
cover_timestamp = os.path.getmtime(cover)
|
cover_timestamp = os.path.getmtime(title['cover'])
|
||||||
thumb_timestamp = os.path.getmtime(thumb_fp)
|
thumb_timestamp = os.path.getmtime(thumb_fp)
|
||||||
if thumb_timestamp < cover_timestamp:
|
if thumb_timestamp < cover_timestamp:
|
||||||
# if verbose: print "updating thumbnail for %s" % title['title']
|
# if verbose: print "updating thumbnail for %s" % title['title']
|
||||||
@ -1470,7 +1469,6 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
spine = soup.find('spine')
|
spine = soup.find('spine')
|
||||||
stc = 0
|
stc = 0
|
||||||
guide = soup.find('guide')
|
guide = soup.find('guide')
|
||||||
gtc = 0
|
|
||||||
|
|
||||||
itemTag = Tag(soup, "item")
|
itemTag = Tag(soup, "item")
|
||||||
itemTag['id'] = "ncx"
|
itemTag['id'] = "ncx"
|
||||||
@ -1796,7 +1794,6 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
print self.updateProgressFullStep("generateNCXByAuthor()")
|
print self.updateProgressFullStep("generateNCXByAuthor()")
|
||||||
|
|
||||||
soup = self.ncxSoup
|
soup = self.ncxSoup
|
||||||
output = "ByAlphaAuthor"
|
|
||||||
HTML_file = "content/ByAlphaAuthor.html"
|
HTML_file = "content/ByAlphaAuthor.html"
|
||||||
body = soup.find("navPoint")
|
body = soup.find("navPoint")
|
||||||
btc = len(body.contents)
|
btc = len(body.contents)
|
||||||
@ -2036,7 +2033,6 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
content = node
|
content = node
|
||||||
# Special handling for '&' in 'cover'
|
# Special handling for '&' in 'cover'
|
||||||
if key == 'cover' and re.search('&',content):
|
if key == 'cover' and re.search('&',content):
|
||||||
hit = re.search('&',content)
|
|
||||||
content = re.sub('&','&',content)
|
content = re.sub('&','&',content)
|
||||||
|
|
||||||
if content:
|
if content:
|
||||||
|
@ -7652,7 +7652,7 @@ msgstr ""
|
|||||||
"Tilgjengelige områder: alle, forfattersortering, forfattere, kommentarer, "
|
"Tilgjengelige områder: alle, forfattersortering, forfattere, kommentarer, "
|
||||||
"omslagsbilde, formater, id, isbn, publikasjonsdato, bedømmelse, "
|
"omslagsbilde, formater, id, isbn, publikasjonsdato, bedømmelse, "
|
||||||
"serieinndeks, serier, størrelse, tidsmerker, tittel, uuid.\n"
|
"serieinndeks, serier, størrelse, tidsmerker, tittel, uuid.\n"
|
||||||
"Standard: %standard\n"
|
"Standard: %default\n"
|
||||||
"Gjelder: CSV, XML utdataformater"
|
"Gjelder: CSV, XML utdataformater"
|
||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/library/catalog.py:34
|
#: /home/kovid/work/calibre/src/calibre/library/catalog.py:34
|
||||||
@ -7665,7 +7665,7 @@ msgstr ""
|
|||||||
"Utdatafelter som kan sorteres.\n"
|
"Utdatafelter som kan sorteres.\n"
|
||||||
"Tilgjengelige områder: forfattersortering, id, bedømmelse, størrelse, "
|
"Tilgjengelige områder: forfattersortering, id, bedømmelse, størrelse, "
|
||||||
"tidsmerking, tittel.\n"
|
"tidsmerking, tittel.\n"
|
||||||
"Standard: %standard'\n"
|
"Standard: '%default'\n"
|
||||||
"Gjelder: CSV, XML utdataformater"
|
"Gjelder: CSV, XML utdataformater"
|
||||||
|
|
||||||
#: /home/kovid/work/calibre/src/calibre/library/cli.py:121
|
#: /home/kovid/work/calibre/src/calibre/library/cli.py:121
|
||||||
|
Loading…
x
Reference in New Issue
Block a user