mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
b2dceb1a7a
@ -1,6 +1,3 @@
|
|||||||
'''
|
|
||||||
dnaindia.com
|
|
||||||
'''
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
@ -12,6 +9,10 @@ class DNAIndia(BasicNewsRecipe):
|
|||||||
language = 'en_IN'
|
language = 'en_IN'
|
||||||
|
|
||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
|
('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
|
||||||
@ -22,15 +23,10 @@ class DNAIndia(BasicNewsRecipe):
|
|||||||
('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
|
('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
|
||||||
('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
|
('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
|
||||||
('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
|
('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
|
||||||
('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
|
('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml')
|
||||||
('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),
|
|
||||||
]
|
]
|
||||||
remove_tags = [{'id':['footer', 'lhs-col']}, {'class':['bottom', 'categoryHead',
|
|
||||||
'article_tools']}]
|
|
||||||
keep_only_tags = dict(id='middle-col')
|
|
||||||
remove_tags_after=[dict(attrs={'id':'story'})]
|
|
||||||
remove_attributes=['style']
|
|
||||||
no_stylesheets = True
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
match = re.search(r'newsid=(\d+)', url)
|
match = re.search(r'newsid=(\d+)', url)
|
||||||
|
@ -9,11 +9,16 @@ class TimesOfIndia(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
keep_only_tags = [{'class':['maintable12', 'prttabl']}]
|
remove_attributes = ['style']
|
||||||
|
keep_only_tags = [
|
||||||
|
{'class':re.compile(r'maintable12|prttabl')},
|
||||||
|
{'id':['mod-article-header',
|
||||||
|
'mod-a-body-after-first-para', 'mod-a-body-first-para']},
|
||||||
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(style=lambda x: x and 'float' in x),
|
{'class':re.compile('tabsintbgshow|prvnxtbg')},
|
||||||
{'class':['prvnxtbg', 'footbdrin', 'bcclftr']},
|
{'id':['fbrecommend', 'relmaindiv']}
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Top Stories',
|
('Top Stories',
|
||||||
@ -41,6 +46,8 @@ class TimesOfIndia(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
|
# Times of India sometimes serves an ad page instead of the article,
|
||||||
|
# this code, detects and circumvents that
|
||||||
url = BasicNewsRecipe.get_article_url(self, article)
|
url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
if '/0Ltimesofindia' in url:
|
if '/0Ltimesofindia' in url:
|
||||||
url = url.partition('/0L')[-1]
|
url = url.partition('/0L')[-1]
|
||||||
@ -61,6 +68,3 @@ class TimesOfIndia(BasicNewsRecipe):
|
|||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
|
||||||
return soup
|
|
||||||
|
@ -73,6 +73,20 @@ class Worker(Thread): # Get details {{{
|
|||||||
8: ['août'],
|
8: ['août'],
|
||||||
9: ['sept'],
|
9: ['sept'],
|
||||||
12: ['déc'],
|
12: ['déc'],
|
||||||
|
},
|
||||||
|
'es': {
|
||||||
|
1: ['enero'],
|
||||||
|
2: ['febrero'],
|
||||||
|
3: ['marzo'],
|
||||||
|
4: ['abril'],
|
||||||
|
5: ['mayo'],
|
||||||
|
6: ['junio'],
|
||||||
|
7: ['julio'],
|
||||||
|
8: ['agosto'],
|
||||||
|
9: ['septiembre', 'setiembre'],
|
||||||
|
10: ['octubre'],
|
||||||
|
11: ['noviembre'],
|
||||||
|
12: ['diciembre'],
|
||||||
},
|
},
|
||||||
'jp': {
|
'jp': {
|
||||||
1: [u'1月'],
|
1: [u'1月'],
|
||||||
@ -101,13 +115,16 @@ class Worker(Thread): # Get details {{{
|
|||||||
text()="Dettagli prodotto" or \
|
text()="Dettagli prodotto" or \
|
||||||
text()="Product details" or \
|
text()="Product details" or \
|
||||||
text()="Détails sur le produit" or \
|
text()="Détails sur le produit" or \
|
||||||
|
text()="Detalles del producto" or \
|
||||||
text()="登録情報"]/../div[@class="content"]
|
text()="登録情報"]/../div[@class="content"]
|
||||||
'''
|
'''
|
||||||
|
# Editor: is for Spanish
|
||||||
self.publisher_xpath = '''
|
self.publisher_xpath = '''
|
||||||
descendant::*[starts-with(text(), "Publisher:") or \
|
descendant::*[starts-with(text(), "Publisher:") or \
|
||||||
starts-with(text(), "Verlag:") or \
|
starts-with(text(), "Verlag:") or \
|
||||||
starts-with(text(), "Editore:") or \
|
starts-with(text(), "Editore:") or \
|
||||||
starts-with(text(), "Editeur") or \
|
starts-with(text(), "Editeur") or \
|
||||||
|
starts-with(text(), "Editor:") or \
|
||||||
starts-with(text(), "出版社:")]
|
starts-with(text(), "出版社:")]
|
||||||
'''
|
'''
|
||||||
self.language_xpath = '''
|
self.language_xpath = '''
|
||||||
@ -116,12 +133,14 @@ class Worker(Thread): # Get details {{{
|
|||||||
or text() = "Language" \
|
or text() = "Language" \
|
||||||
or text() = "Sprache:" \
|
or text() = "Sprache:" \
|
||||||
or text() = "Lingua:" \
|
or text() = "Lingua:" \
|
||||||
|
or text() = "Idioma:" \
|
||||||
or starts-with(text(), "Langue") \
|
or starts-with(text(), "Langue") \
|
||||||
or starts-with(text(), "言語") \
|
or starts-with(text(), "言語") \
|
||||||
]
|
]
|
||||||
'''
|
'''
|
||||||
|
|
||||||
self.ratings_pat = re.compile(
|
self.ratings_pat = re.compile(
|
||||||
r'([0-9.]+) ?(out of|von|su|étoiles sur|つ星のうち) ([\d\.]+)( (stars|Sternen|stelle)){0,1}')
|
r'([0-9.]+) ?(out of|von|su|étoiles sur|つ星のうち|de un máximo de) ([\d\.]+)( (stars|Sternen|stelle|estrellas)){0,1}')
|
||||||
|
|
||||||
lm = {
|
lm = {
|
||||||
'eng': ('English', 'Englisch'),
|
'eng': ('English', 'Englisch'),
|
||||||
@ -143,6 +162,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
for i, vals in self.months.iteritems():
|
for i, vals in self.months.iteritems():
|
||||||
for x in vals:
|
for x in vals:
|
||||||
ans = ans.replace(x, self.english_months[i])
|
ans = ans.replace(x, self.english_months[i])
|
||||||
|
ans = ans.replace(' de ', ' ')
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
@ -422,6 +442,7 @@ class Amazon(Source):
|
|||||||
'uk' : _('UK'),
|
'uk' : _('UK'),
|
||||||
'it' : _('Italy'),
|
'it' : _('Italy'),
|
||||||
'jp' : _('Japan'),
|
'jp' : _('Japan'),
|
||||||
|
'es' : _('Spain'),
|
||||||
}
|
}
|
||||||
|
|
||||||
options = (
|
options = (
|
||||||
@ -789,6 +810,16 @@ if __name__ == '__main__': # tests {{{
|
|||||||
),
|
),
|
||||||
] # }}}
|
] # }}}
|
||||||
|
|
||||||
|
es_tests = [ # {{{
|
||||||
|
(
|
||||||
|
{'identifiers':{'isbn': '8483460831'}},
|
||||||
|
[title_test('Tiempos Interesantes',
|
||||||
|
exact=True), authors_test(['Terry Pratchett'])
|
||||||
|
]
|
||||||
|
|
||||||
|
),
|
||||||
|
] # }}}
|
||||||
|
|
||||||
jp_tests = [ # {{{
|
jp_tests = [ # {{{
|
||||||
( # isbn -> title, authors
|
( # isbn -> title, authors
|
||||||
{'identifiers':{'isbn': '9784101302720' }},
|
{'identifiers':{'isbn': '9784101302720' }},
|
||||||
@ -804,6 +835,6 @@ if __name__ == '__main__': # tests {{{
|
|||||||
] # }}}
|
] # }}}
|
||||||
|
|
||||||
test_identify_plugin(Amazon.name, com_tests)
|
test_identify_plugin(Amazon.name, com_tests)
|
||||||
#test_identify_plugin(Amazon.name, jp_tests)
|
#test_identify_plugin(Amazon.name, es_tests)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
@ -173,6 +173,10 @@ class PDFWriter(QObject): # {{{
|
|||||||
printer.setOutputFormat(QPrinter.NativeFormat)
|
printer.setOutputFormat(QPrinter.NativeFormat)
|
||||||
self.view.print_(printer)
|
self.view.print_(printer)
|
||||||
printer.abort()
|
printer.abort()
|
||||||
|
else:
|
||||||
|
# The document is so corrupt that we can't render the page.
|
||||||
|
self.loop.exit(0)
|
||||||
|
raise Exception('Document cannot be rendered.')
|
||||||
self._render_book()
|
self._render_book()
|
||||||
|
|
||||||
def _delete_tmpdir(self):
|
def _delete_tmpdir(self):
|
||||||
@ -207,11 +211,14 @@ class PDFWriter(QObject): # {{{
|
|||||||
try:
|
try:
|
||||||
outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author)
|
outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author)
|
||||||
for item in self.combine_queue:
|
for item in self.combine_queue:
|
||||||
with open(item, 'rb') as item_stream:
|
# The input PDF stream must remain open until the final PDF
|
||||||
inputPDF = PdfFileReader(item_stream)
|
# is written to disk. PyPDF references pages added to the
|
||||||
for page in inputPDF.pages:
|
# final PDF from the input PDF on disk. It does not store
|
||||||
outPDF.addPage(page)
|
# the pages in memory so we can't close the input PDF.
|
||||||
outPDF.write(self.out_stream)
|
inputPDF = PdfFileReader(open(item, 'rb'))
|
||||||
|
for page in inputPDF.pages:
|
||||||
|
outPDF.addPage(page)
|
||||||
|
outPDF.write(self.out_stream)
|
||||||
finally:
|
finally:
|
||||||
self._delete_tmpdir()
|
self._delete_tmpdir()
|
||||||
self.loop.exit(0)
|
self.loop.exit(0)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user