mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Updated Mig Pao
This commit is contained in:
parent
87b37ac4e2
commit
6a1f2e8dd1
@ -24,6 +24,7 @@ __Date__ = ''
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
Change Log:
|
Change Log:
|
||||||
|
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
||||||
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
||||||
2011/10/19: fix a bug in txt source parsing
|
2011/10/19: fix a bug in txt source parsing
|
||||||
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||||
@ -533,12 +534,22 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||||
next_is_img_txt = False
|
next_is_img_txt = False
|
||||||
title_started = False
|
title_started = False
|
||||||
|
title_break_reached = False
|
||||||
met_article_start_char = False
|
met_article_start_char = False
|
||||||
for item in splitter.split(raw_html):
|
for item in splitter.split(raw_html):
|
||||||
item = item.strip()
|
item = item.strip()
|
||||||
if item.startswith(u'\u3010'):
|
# if title already reached but break between title and content not yet found, record title_break_reached
|
||||||
met_article_start_char = True
|
if title_started == True and title_break_reached == False and item == '':
|
||||||
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
title_break_reached = True
|
||||||
|
# if title reached and title_break_reached and met_article_start_char == False and item is not empty
|
||||||
|
# start content
|
||||||
|
elif title_started == True and title_break_reached == True and met_article_start_char == False:
|
||||||
|
if item <> '':
|
||||||
|
met_article_start_char = True
|
||||||
|
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||||
|
#if item.startswith(u'\u3010'):
|
||||||
|
# met_article_start_char = True
|
||||||
|
# new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||||
else:
|
else:
|
||||||
if next_is_img_txt == False:
|
if next_is_img_txt == False:
|
||||||
if item.startswith("=@"):
|
if item.startswith("=@"):
|
||||||
@ -787,3 +798,4 @@ class MPRecipe(BasicNewsRecipe):
|
|||||||
|
|
||||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||||
opf.render(opf_file, ncx_file)
|
opf.render(opf_file, ncx_file)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user