Sync to trunk.

This commit is contained in:
John Schember 2010-01-10 08:41:29 -05:00
commit 5f5fd2a2a8
49 changed files with 23573 additions and 12607 deletions

View File

@ -0,0 +1,28 @@
from calibre.web.feeds.news import BasicNewsRecipe
class DallasNews(BasicNewsRecipe):
title = u'The Dallas Morning News'
language = 'en'
oldest_article = 2 #days
max_articles_per_feed = 25
no_stylesheets = True
remove_tags_before = dict(name='h2', attrs={'class':'vitstoryheadline'})
remove_tags_after = dict(name='div', attrs={'style':'width: 100%; clear: right'})
remove_tags_after = dict(name='div', attrs={'id':'article_tools_bottom'})
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':'biblockmore'}),
dict(name='div', attrs={'style':'width: 100%; clear: right'}),
dict(name='div', attrs={'id':'article_tools_bottom'}),
#dict(name='ul', attrs={'class':'articleTools'}),
]
feeds = [
('Latest News', 'http://www.dallasnews.com/newskiosk/rss/dallasnewslatestnews.xml'),
('Local News', 'http://www.dallasnews.com/newskiosk/rss/dallasnewslocalnews.xml'),
('Nation and World', 'http://www.dallasnews.com/newskiosk/rss/dallasnewsnationworld.xml'),
('Politics', 'http://www.dallasnews.com/newskiosk/rss/dallasnewsnationalpolitics.xml'),
('Science', 'http://www.dallasnews.com/newskiosk/rss/dallasnewsscience.xml'),
]

View File

@ -7,6 +7,7 @@ economist.com
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
import mechanize, string, urllib, time import mechanize, string, urllib, time
@ -103,3 +104,22 @@ class Economist(BasicNewsRecipe):
if not ans: if not ans:
raise Exception('Could not find any articles. Has your subscription expired?') raise Exception('Could not find any articles. Has your subscription expired?')
return ans return ans
def eco_find_image_tables(self, soup):
for x in soup.findAll('table', align='right'):
if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
yield x
def postprocess_html(self, soup, first):
for table in list(self.eco_find_image_tables(soup)):
caption = table.find('font')
img = table.find('img')
div = Tag(soup, 'div')
div['style'] = 'text-align:center;font-size:70%'
ns = NavigableString(self.tag_to_string(caption))
div.insert(0, ns)
div.insert(1, Tag(soup, 'br'))
img.extract()
div.insert(2, img)
table.replaceWith(div)
return soup

View File

@ -1,6 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.threadpool import ThreadPool, makeRequests from calibre.utils.threadpool import ThreadPool, makeRequests
import time from calibre.ebooks.BeautifulSoup import Tag, NavigableString
import time, string
from datetime import datetime from datetime import datetime
from lxml import html from lxml import html
@ -48,7 +49,30 @@ class Economist(BasicNewsRecipe):
for r in requests: pool.putRequest(r) for r in requests: pool.putRequest(r)
pool.wait() pool.wait()
return [(t, a) for t, a in self.feed_dict.items()] return self.eco_sort_sections([(t, a) for t, a in
self.feed_dict.items()])
def eco_sort_sections(self, feeds):
order = {
'The World This Week': 1,
'Leaders': 2,
'Letters': 3,
'Briefing': 4,
'Business': 5,
'Finance And Economics': 6,
'Science & Technology': 7,
'Books & Arts': 8,
'International': 9,
'United States': 10,
'Asia': 11,
'Europe': 12,
'The Americas': 13,
'Middle East & Africa': 14,
'Britain': 15,
'Obituary': 16,
}
return sorted(feeds, cmp=lambda x,y:cmp(order.get(x[0], 100),
order.get(y[0], 100)))
def process_eco_feed_article(self, args): def process_eco_feed_article(self, args):
from calibre import browser from calibre import browser
@ -61,8 +85,8 @@ class Economist(BasicNewsRecipe):
matches = root.xpath('//*[@class = "article-section"]') matches = root.xpath('//*[@class = "article-section"]')
feedtitle = 'Miscellaneous' feedtitle = 'Miscellaneous'
if matches: if matches:
feedtitle = html.tostring(matches[0], method='text', feedtitle = string.capwords(html.tostring(matches[0], method='text',
encoding=unicode) encoding=unicode))
return (i, feedtitle, url, title, description, author, published) return (i, feedtitle, url, title, description, author, published)
def eco_article_found(self, req, result): def eco_article_found(self, req, result):
@ -81,3 +105,22 @@ class Economist(BasicNewsRecipe):
def eco_article_failed(self, req, tb): def eco_article_failed(self, req, tb):
self.log.error('Failed to download %s with error:'%req.args[0][2]) self.log.error('Failed to download %s with error:'%req.args[0][2])
self.log.debug(tb) self.log.debug(tb)
def eco_find_image_tables(self, soup):
for x in soup.findAll('table', align='right'):
if len(x.findAll('font')) in (1,2) and len(x.findAll('img')) == 1:
yield x
def postprocess_html(self, soup, first):
for table in list(self.eco_find_image_tables(soup)):
caption = table.find('font')
img = table.find('img')
div = Tag(soup, 'div')
div['style'] = 'text-align:center;font-size:70%'
ns = NavigableString(self.tag_to_string(caption))
div.insert(0, ns)
div.insert(1, Tag(soup, 'br'))
img.extract()
div.insert(2, img)
table.replaceWith(div)
return soup

View File

@ -1,5 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
import re import re
class NatureNews(BasicNewsRecipe): class NatureNews(BasicNewsRecipe):
@ -30,15 +29,3 @@ class NatureNews(BasicNewsRecipe):
def get_article_url(self, article): def get_article_url(self, article):
return article.get('id') return article.get('id')
#def preprocess_html(self, soup):
#story = soup.find(name='div', attrs={'id':'contentColumn'})
#td = heading.findParent(name='td')
#td.extract()
#soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
#body = soup.find(name='body')
#body.insert(0, story)
#for x in soup.findAll(name='p', text=lambda x:x and '--&gt;' in x):
#p = x.findParent('p')
#if p is not None:
#p.extract()
#return soup

View File

@ -27,7 +27,7 @@ class NYTimes(BasicNewsRecipe):
'side_tool', 'side_index', 'side_tool', 'side_index',
'relatedArticles', 'relatedTopics', 'adxSponLink']), 'relatedArticles', 'relatedTopics', 'adxSponLink']),
dict(name=['script', 'noscript', 'style'])] dict(name=['script', 'noscript', 'style'])]
encoding = 'cp1252' #encoding = 'cp1252'
no_stylesheets = True no_stylesheets = True
extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}' extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
@ -118,5 +118,5 @@ class NYTimes(BasicNewsRecipe):
if refresh is None: if refresh is None:
return soup return soup
content = refresh.get('content').partition('=')[2] content = refresh.get('content').partition('=')[2]
raw = self.browser.open('http://www.nytimes.com'+content).read() raw = self.browser.open_novisit('http://www.nytimes.com'+content).read()
return BeautifulSoup(raw.decode('cp1252', 'replace')) return BeautifulSoup(raw.decode('cp1252', 'replace'))

View File

@ -0,0 +1,73 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
class NewZealandHerald(BasicNewsRecipe):
title = 'New Zealand Herald'
__author__ = 'Krittika Goyal'
description = 'Daily news'
timefmt = ' [%d %b, %Y]'
no_stylesheets = True
remove_tags_before = dict(name='div', attrs={'class':'contentContainer left eight'})
remove_tags_after = dict(name='div', attrs={'class':'callToAction'})
remove_tags = [
dict(name='iframe'),
dict(name='div', attrs={'class':['sectionHeader', 'tools','callToAction', 'contentContainer right two nopad relatedColumn']}),
#dict(name='div', attrs={'id':['shareContainer']}),
#dict(name='form', attrs={'onsubmit':"return verifySearch(this.w,'Keyword, citation, or #author')"}),
#dict(name='table', attrs={'cellspacing':'0'}),
]
def preprocess_html(self, soup):
table = soup.find('table')
if table is not None:
table.extract()
return soup
#TO GET ARTICLES IN SECTION
def nz_parse_section(self, url):
soup = self.index_to_soup(url)
div = soup.find(attrs={'class':'col-300 categoryList'})
date = div.find(attrs={'class':'link-list-heading'})
current_articles = []
for x in date.findAllNext(attrs={'class':['linkList', 'link-list-heading']}):
if x.get('class') == 'link-list-heading': break
for li in x.findAll('li'):
a = li.find('a', href=True)
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
if url.startswith('/'):
url = 'http://www.nzherald.co.nz'+url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
current_articles.append({'title': title, 'url':url,
'description':'', 'date':''})
return current_articles
# To GET SECTIONS
def parse_index(self):
feeds = []
for title, url in [
('National',
'http://www.nzherald.co.nz/nz/news/headlines.cfm?c_id=1'),
('World',
'http://www.nzherald.co.nz/world/news/headlines.cfm?c_id=2'),
('Politics',
'http://www.nzherald.co.nz/politics/news/headlines.cfm?c_id=280'),
('Crime',
'http://www.nzherald.co.nz/crime/news/headlines.cfm?c_id=30'),
('Environment',
'http://www.nzherald.co.nz/environment/news/headlines.cfm?c_id=39'),
]:
articles = self.nz_parse_section(url)
if articles:
feeds.append((title, articles))
return feeds

View File

@ -0,0 +1,16 @@
from calibre.web.feeds.news import BasicNewsRecipe
class SGhu(BasicNewsRecipe):
title = u'SG.hu'
__author__ = 'davotibarna'
description = u'Informatika \xe9s Tudom\xe1ny'
language = 'hu'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'ISO-8859-2'
feeds = [(u'SG.hu', u'http://www.sg.hu/plain/rss.xml')]
def print_version(self, url):
return url.replace('cikkek/', 'printer.php?cid=')

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
description = 'the Escapist Magazine - v1.02 (09, January 2010)'
'''
http://www.escapistmagazine.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class al(BasicNewsRecipe):
author = 'Lorenzo Vigentini'
description = 'the Escapist Magazine'
cover_url = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
title = u'the Escapist Magazine'
publisher = 'Themis media'
category = 'Video games news, lifestyle, gaming culture'
language = 'en'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
max_articles_per_feed = 100
use_embedded_content = False
recursion = 10
remove_javascript = True
no_stylesheets = True
feeds = [
(u'Daily News', u'http://www.escapistmagazine.com/rss/news/0.xml'),
(u'Articles', u'http://www.escapistmagazine.com/rss/articles/0.xml')
]
def print_version(self,url):
baseURL='http://www.escapistmagazine.com'
segments = url.split('/')
#basename = '/'.join(segments[:3]) + '/'
subPath= '/'+ segments[3] + '/'
articleURL=(segments[len(segments)-1])[0:5]
if articleURL[4] =='-':
articleURL=articleURL[:4]
printVerString='print/'+ articleURL
s= baseURL + subPath + printVerString
return s
keep_only_tags = [
dict(name='div', attrs={'id':'article'})
]
remove_tags = [
dict(name='div',attrs={'id':['ad_leaderboard','print_notice','bottom_panel_container']})
]

View File

@ -111,6 +111,7 @@ class VMInstaller(Command):
self.vm = self.VM self.vm = self.VM
if not self.vmware_started(): if not self.vmware_started():
self.start_vmware() self.start_vmware()
subprocess.call(['chmod', '-R', '+r', 'resources/recipes'])
self.start_vm() self.start_vm()
self.download_installer() self.download_installer()
if not self.dont_shutdown: if not self.dont_shutdown:

View File

@ -416,6 +416,7 @@ class NookOutput(OutputProfile):
# Screen size is a best guess # Screen size is a best guess
screen_size = (600, 730) screen_size = (600, 730)
comic_screen_size = (584, 730)
dpi = 167 dpi = 167
fbase = 16 fbase = 16
fsizes = [12, 12, 14, 16, 18, 20, 22, 24] fsizes = [12, 12, 14, 16, 18, 20, 22, 24]

View File

@ -187,7 +187,7 @@ class BookList(_BookList):
self.remove_book(name) self.remove_book(name)
node = self.document.createElement(self.prefix + "text") node = self.document.createElement(self.prefix + "text")
mime = MIME_MAP[name.rpartition('.')[-1].lower()] mime = MIME_MAP.get(name.rpartition('.')[-1].lower(), MIME_MAP['epub'])
cid = self.max_id()+1 cid = self.max_id()+1
try: try:
sourceid = str(self[0].sourceid) if len(self) else '1' sourceid = str(self[0].sourceid) if len(self) else '1'

View File

@ -56,7 +56,7 @@ class PRS505(CLI, Device):
EBOOK_DIR_MAIN = 'database/media/books' EBOOK_DIR_MAIN = 'database/media/books'
EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of metadata fields ' EXTRA_CUSTOMIZATION_MESSAGE = _('Comma separated list of metadata fields '
'to turn into collections on the device. Posiibilities include: ')+\ 'to turn into collections on the device. Possibilities include: ')+\
'series, tags, authors' 'series, tags, authors'
EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(['series', 'tags']) EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(['series', 'tags'])

View File

@ -117,9 +117,10 @@ class FB2MLizer(object):
'<book-title>%s</book-title> ' \ '<book-title>%s</book-title> ' \
'</title-info><document-info> ' \ '</title-info><document-info> ' \
'<program-used>%s - %s</program-used></document-info>\n' \ '<program-used>%s - %s</program-used></document-info>\n' \
'</description>\n<body>\n<section>' % (author_first, author_middle, '</description>\n<body>\n<section>' % tuple(map(prepare_string_for_xml,
(author_first, author_middle,
author_last, self.oeb_book.metadata.title[0].value, author_last, self.oeb_book.metadata.title[0].value,
__appname__, __version__) __appname__, __version__)))
def get_cover_page(self): def get_cover_page(self):
output = u'' output = u''

View File

@ -44,13 +44,13 @@ class TocExtension (markdown.Extension):
replaces first string occurence of "///Table of Contents Goes Here///" replaces first string occurence of "///Table of Contents Goes Here///"
""" """
def __init__ (self) : def __init__ (self, configs={}) :
#maybe add these as parameters to the class init? #maybe add these as parameters to the class init?
self.TOC_INCLUDE_MARKER = "///Table of Contents///" self.TOC_INCLUDE_MARKER = "///Table of Contents///"
self.TOC_TITLE = "Table Of Contents" self.TOC_TITLE = "Table Of Contents"
self.auto_toc_heading_type=2 self.auto_toc_heading_type=2
self.toc_heading_type=3 self.toc_heading_type=3
self.configs = configs
def extendMarkdown(self, md, md_globals) : def extendMarkdown(self, md, md_globals) :
# Just insert in the end # Just insert in the end
@ -148,16 +148,22 @@ class TocPostprocessor (markdown.Postprocessor):
def run(self, doc): def run(self, doc):
tocPlaceholder = self.toc.findTocPlaceholder(doc) tocPlaceholder = self.toc.findTocPlaceholder(doc)
tocDiv = self.toc.createTocDiv(doc) if self.toc.configs.get("disable_toc", False):
if tocDiv: if tocPlaceholder:
if tocPlaceholder : tocPlaceholder.parent.replaceChild(tocPlaceholder, "")
# Replace "magic" pattern with toc else:
tocPlaceholder.parent.replaceChild(tocPlaceholder, tocDiv)
else : tocDiv = self.toc.createTocDiv(doc)
# Dump at the end of the DOM
# Probably want to use CSS to position div if tocDiv:
doc.documentElement.appendChild(tocDiv) if tocPlaceholder :
# Replace "magic" pattern with toc
tocPlaceholder.parent.replaceChild(tocPlaceholder, tocDiv)
else :
# Dump at the end of the DOM
# Probably want to use CSS to position div
doc.documentElement.appendChild(tocDiv)
def makeExtension(configs=None) : def makeExtension(configs={}):
return TocExtension() return TocExtension(configs=configs)

View File

@ -468,6 +468,7 @@ class MobiReader(object):
self.processed_html = self.processed_html.replace('\r\n', '\n') self.processed_html = self.processed_html.replace('\r\n', '\n')
self.processed_html = self.processed_html.replace('> <', '>\n<') self.processed_html = self.processed_html.replace('> <', '>\n<')
self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:') self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:')
self.processed_html = re.sub(r'<?xml[^>]*>', '', self.processed_html)
def remove_random_bytes(self, html): def remove_random_bytes(self, html):
return re.sub('\x14|\x15|\x19|\x1c|\x1d|\xef|\x12|\x13|\xec|\x08', return re.sub('\x14|\x15|\x19|\x1c|\x1d|\xef|\x12|\x13|\xec|\x08',
@ -490,6 +491,8 @@ class MobiReader(object):
'xx-large': '6', 'xx-large': '6',
} }
mobi_version = self.book_header.mobi_version mobi_version = self.book_header.mobi_version
for x in root.xpath('//ncx'):
x.getparent().remove(x)
for i, tag in enumerate(root.iter(etree.Element)): for i, tag in enumerate(root.iter(etree.Element)):
tag.attrib.pop('xmlns', '') tag.attrib.pop('xmlns', '')
for x in tag.attrib: for x in tag.attrib:

View File

@ -960,7 +960,7 @@ class Manifest(object):
else: else:
title = _('Unknown') title = _('Unknown')
return self._parse_xhtml(convert_markdown(data, title)) return self._parse_xhtml(convert_markdown(data, title=title))
def _parse_css(self, data): def _parse_css(self, data):

View File

@ -174,7 +174,8 @@ class EbookIterator(object):
plumber.opts, plumber.input_fmt, self.log, plumber.opts, plumber.input_fmt, self.log,
{}, self.base) {}, self.base)
if processed or plumber.input_fmt.lower() in ('pdf', 'rb'): if processed or plumber.input_fmt.lower() in ('pdf', 'rb') and \
not hasattr(self.pathtoopf, 'manifest'):
self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts, self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts,
plumber.input_plugin) plumber.input_plugin)
if hasattr(self.pathtoopf, 'manifest'): if hasattr(self.pathtoopf, 'manifest'):

View File

@ -15,7 +15,7 @@ pdfreflow, pdfreflow_err = plugins['pdfreflow']
class PDFInput(InputFormatPlugin): class PDFInput(InputFormatPlugin):
name = 'PDF Input' name = 'PDF Input'
author = 'John Schember' author = 'Kovid Goyal and John Schember'
description = 'Convert PDF files to HTML' description = 'Convert PDF files to HTML'
file_types = set(['pdf']) file_types = set(['pdf'])

View File

@ -18,9 +18,52 @@ class Font(object):
self.color = spec.get('color') self.color = spec.get('color')
self.family = spec.get('family') self.family = spec.get('family')
class Text(object): class Column(object):
def __init__(self, text, font_map, opts, log): def __init__(self):
self.left = self.right = self.top = self.bottom = 0
self.width = self.height = 0
self.elements = []
def add(self, elem):
if elem in self.elements: return
self.elements.append(elem)
self.elements.sort(cmp=lambda x,y:cmp(x.bottom,y.bottom))
self.top = self.elements[0].top
self.bottom = self.elements[-1].bottom
self.left, self.right = sys.maxint, 0
for x in self:
self.left = min(self.left, x.left)
self.right = max(self.right, x.right)
self.width, self.height = self.right-self.left, self.bottom-self.top
def __iter__(self):
for x in self.elements:
yield x
class Element(object):
def __eq__(self, other):
return self.id == other.id
def __hash__(self):
return hash(self.id)
class Image(Element):
def __init__(self, img, opts, log, idc):
self.opts, self.log = opts, log
self.id = idc.next()
self.top, self.left, self.width, self.height, self.iwidth, self.iheight = \
map(float, map(img.get, ('top', 'left', 'rwidth', 'rheight', 'iwidth',
'iheight')))
self.src = img.get('src')
class Text(Element):
def __init__(self, text, font_map, opts, log, idc):
self.id = idc.next()
self.opts, self.log = opts, log self.opts, self.log = opts, log
self.font_map = font_map self.font_map = font_map
self.top, self.left, self.width, self.height = map(float, map(text.get, self.top, self.left, self.width, self.height = map(float, map(text.get,
@ -90,47 +133,6 @@ class Interval(object):
return hash('(%f,%f)'%self.left, self.right) return hash('(%f,%f)'%self.left, self.right)
class HorizontalBox(object):
def __init__(self, base_text):
self.texts = [base_text]
self.bottom = base_text.bottom
self.number_of_columns = None
self.column_map = {}
def append(self, t):
self.texts.append(t)
def sort(self, left_margin, right_margin):
self.texts.sort(cmp=lambda x,y: cmp(x.left, y.left))
self.top, self.bottom = sys.maxint, 0
for t in self.texts:
self.top = min(self.top, t.top)
self.bottom = max(self.bottom, t.bottom)
self.left = self.texts[0].left
self.right = self.texts[-1].right
self.gaps = []
for i, t in enumerate(self.texts[1:]):
gap = Interval(self.texts[i].right, t.left)
if gap.width > 3:
self.gaps.append(gap)
left = Interval(left_margin, self.texts[0].left)
if left.width > 3:
self.gaps.insert(0, left)
right = Interval(self.texts[-1].right, right_margin)
if right.width > 3:
self.gaps.append(right)
def has_intersection_with(self, gap):
for g in self.gaps:
if g.intersection(gap):
return True
return False
def identify_columns(self, column_gaps):
self.number_of_columns = len(column_gaps) + 1
class Page(object): class Page(object):
# Fraction of a character width that two strings have to be apart, # Fraction of a character width that two strings have to be apart,
@ -141,8 +143,10 @@ class Page(object):
# for them to be considered to be part of the same text fragment # for them to be considered to be part of the same text fragment
LINE_FACTOR = 0.4 LINE_FACTOR = 0.4
YFUZZ = 1.5
def __init__(self, page, font_map, opts, log):
def __init__(self, page, font_map, opts, log, idc):
self.opts, self.log = opts, log self.opts, self.log = opts, log
self.font_map = font_map self.font_map = font_map
self.number = int(page.get('number')) self.number = int(page.get('number'))
@ -154,7 +158,7 @@ class Page(object):
self.left_margin, self.right_margin = self.width, 0 self.left_margin, self.right_margin = self.width, 0
for text in page.xpath('descendant::text'): for text in page.xpath('descendant::text'):
self.texts.append(Text(text, self.font_map, self.opts, self.log)) self.texts.append(Text(text, self.font_map, self.opts, self.log, idc))
text = self.texts[-1] text = self.texts[-1]
self.left_margin = min(text.left, self.left_margin) self.left_margin = min(text.left, self.left_margin)
self.right_margin = max(text.right, self.right_margin) self.right_margin = max(text.right, self.right_margin)
@ -162,16 +166,22 @@ class Page(object):
self.textwidth = self.right_margin - self.left_margin self.textwidth = self.right_margin - self.left_margin
self.font_size_stats = {} self.font_size_stats = {}
self.average_text_height = 0
for t in self.texts: for t in self.texts:
if t.font_size not in self.font_size_stats: if t.font_size not in self.font_size_stats:
self.font_size_stats[t.font_size] = 0 self.font_size_stats[t.font_size] = 0
self.font_size_stats[t.font_size] += len(t.text_as_string) self.font_size_stats[t.font_size] += len(t.text_as_string)
self.average_text_height += t.height
self.average_text_height /= len(self.texts)
self.font_size_stats = FontSizeStats(self.font_size_stats) self.font_size_stats = FontSizeStats(self.font_size_stats)
self.coalesce_fragments() self.coalesce_fragments()
#self.identify_columns() self.elements = list(self.texts)
for img in page.xpath('descendant::img'):
self.elements.append(Image(img, self.opts, self.log, idc))
self.elements.sort(cmp=lambda x,y:cmp(x.top, y.top))
def coalesce_fragments(self): def coalesce_fragments(self):
@ -196,46 +206,50 @@ class Page(object):
if match is not None: if match is not None:
self.texts.remove(match) self.texts.remove(match)
def sort_into_horizontal_boxes(self, document_font_size_stats): def first_pass(self):
self.horizontal_boxes = [] self.regions = []
if not self.elements:
return
for i, x in enumerate(self.elements):
x.idx = i
self.current_region = None
processed = set([])
for x in self.elements:
if x in processed: continue
elems = set(self.find_elements_in_row_of(x))
columns = self.sort_into_columns(x, elems)
processed.update(elems)
columns
def find_closest_match(text): def sort_into_columns(self, elem, neighbors):
'Return horizontal box whose bottom is closest to text or None' columns = [Column()]
min, ans = 3.1, None columns[0].add(elem)
for hb in self.horizontal_boxes: for x in neighbors:
diff = abs(text.bottom - hb.bottom) added = False
if diff < min: for c in columns:
diff, ans = min, hb if c.contains(x):
return ans c.add(x)
added = True
for t in self.texts: break
hb = find_closest_match(t) if not added:
if hb is None: columns.append(Column())
self.horizontal_boxes.append(HorizontalBox(t)) columns[-1].add(x)
else: columns.sort(cmp=lambda x,y:cmp(x.left, y.left))
hb.append(t) return columns
for hb in self.horizontal_boxes:
hb.sort(self.left_margin, self.right_margin)
self.horizontal_boxes.sort(cmp=lambda x,y: cmp(x.bottom, y.bottom))
def identify_columns(self):
def neighborhood(i):
if i == len(self.horizontal_boxes)-1:
return self.horizontal_boxes[i-2:i]
if i == len(self.horizontal_boxes)-2:
return (self.horizontal_boxes[i-1], self.horizontal_boxes[i+1])
return self.horizontal_boxes[i+1], self.horizontal_boxes[i+2]
for i, hbox in enumerate(self.horizontal_boxes):
n1, n2 = neighborhood(i)
for gap in hbox.gaps:
gap.is_column_gap = n1.has_intersection_with(gap) and \
n2.has_intersection_with(gap)
def find_elements_in_row_of(self, x):
interval = Interval(x.top - self.YFUZZ * self.average_text_height,
x.top + self.YFUZZ*(1+self.average_text_height))
h_interval = Interval(x.left, x.right)
m = max(0, x.idx-15)
for y in self.elements[m:x.idx+15]:
if y is not x:
y_interval = Interval(y.top, y.bottom)
x_interval = Interval(y.left, y.right)
if interval.intersection(y_interval).width > \
0.5*self.average_text_height and \
x_interval.intersection(h_interval).width <= 0:
yield y
class PDFDocument(object): class PDFDocument(object):
@ -244,6 +258,7 @@ class PDFDocument(object):
self.opts, self.log = opts, log self.opts, self.log = opts, log
parser = etree.XMLParser(recover=True) parser = etree.XMLParser(recover=True)
self.root = etree.fromstring(xml, parser=parser) self.root = etree.fromstring(xml, parser=parser)
idc = iter(xrange(sys.maxint))
self.fonts = [] self.fonts = []
self.font_map = {} self.font_map = {}
@ -256,14 +271,15 @@ class PDFDocument(object):
self.page_map = {} self.page_map = {}
for page in self.root.xpath('//page'): for page in self.root.xpath('//page'):
page = Page(page, self.font_map, opts, log) page = Page(page, self.font_map, opts, log, idc)
self.page_map[page.id] = page self.page_map[page.id] = page
self.pages.append(page) self.pages.append(page)
self.collect_font_statistics() self.collect_font_statistics()
for page in self.pages: for page in self.pages:
page.sort_into_horizontal_boxes(self.font_size_stats) page.document_font_stats = self.font_size_stats
page.first_pass()
def collect_font_statistics(self): def collect_font_statistics(self):
self.font_size_stats = {} self.font_size_stats = {}

View File

@ -31,6 +31,8 @@ class TXTInput(InputFormatPlugin):
OptionRecommendation(name='markdown', recommended_value=False, OptionRecommendation(name='markdown', recommended_value=False,
help=_('Run the text input through the markdown pre-processor. To ' help=_('Run the text input through the markdown pre-processor. To '
'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'), 'learn more about markdown see')+' http://daringfireball.net/projects/markdown/'),
OptionRecommendation(name="markdown_disable_toc", recommended_value=False,
help=_('Do not insert a Table of Contents into the output text.')),
]) ])
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
@ -50,10 +52,10 @@ class TXTInput(InputFormatPlugin):
if options.markdown: if options.markdown:
log.debug('Running text though markdown conversion...') log.debug('Running text though markdown conversion...')
try: try:
html = convert_markdown(txt) html = convert_markdown(txt, disable_toc=options.markdown_disable_toc)
except RuntimeError: except RuntimeError:
raise ValueError('This txt file has malformed markup, it cannot be' raise ValueError('This txt file has malformed markup, it cannot be'
'converted by calibre. See http://daringfireball.net/projects/markdown/syntax') ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
else: else:
html = convert_basic(txt) html = convert_basic(txt)

View File

@ -39,10 +39,11 @@ def convert_basic(txt, title=''):
return HTML_TEMPLATE % (title, '\n'.join(lines)) return HTML_TEMPLATE % (title, '\n'.join(lines))
def convert_markdown(txt, title=''): def convert_markdown(txt, title='', disable_toc=False):
md = markdown.Markdown( md = markdown.Markdown(
extensions=['footnotes', 'tables', 'toc'], extensions=['footnotes', 'tables', 'toc'],
safe_mode=False,) extension_configs={"toc": {"disable_toc": disable_toc}},
safe_mode=False)
return HTML_TEMPLATE % (title, md.convert(txt)) return HTML_TEMPLATE % (title, md.convert(txt))
def separate_paragraphs_single_line(txt): def separate_paragraphs_single_line(txt):

View File

@ -605,9 +605,9 @@ def build_forms(srcdir, info=None):
if form.endswith('viewer%smain.ui'%os.sep): if form.endswith('viewer%smain.ui'%os.sep):
info('\t\tPromoting WebView') info('\t\tPromoting WebView')
dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(') dat = dat.replace('self.view = QtWebKit.QWebView(', 'self.view = DocumentView(')
if iswindows: dat = dat.replace('self.view = QWebView(', 'self.view = DocumentView(')
dat = dat.replace('self.view = QWebView(', 'self.view = DocumentView(') dat = dat.replace('from QtWebKit.QWebView import QWebView',
dat = dat.replace('from QtWebKit.QWebView import QWebView', '') 'from PyQt4 import QtWebKit\nfrom PyQt4.QtWebKit import QWebView')
dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView' dat += '\n\nfrom calibre.gui2.viewer.documentview import DocumentView'
open(compiled_form, 'wb').write(dat) open(compiled_form, 'wb').write(dat)

View File

@ -14,6 +14,6 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None): def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, 'txt_input', Widget.__init__(self, parent, 'txt_input',
['single_line_paras', 'print_formatted_paras', 'markdown']) ['single_line_paras', 'print_formatted_paras', 'markdown', 'markdown_disable_toc'])
self.db, self.book_id = db, book_id self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id) self.initialize_options(get_option, get_help, db, book_id)

View File

@ -14,19 +14,6 @@
<string>Form</string> <string>Form</string>
</property> </property>
<layout class="QGridLayout" name="gridLayout"> <layout class="QGridLayout" name="gridLayout">
<item row="4" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>213</height>
</size>
</property>
</spacer>
</item>
<item row="0" column="0"> <item row="0" column="0">
<widget class="QCheckBox" name="opt_single_line_paras"> <widget class="QCheckBox" name="opt_single_line_paras">
<property name="text"> <property name="text">
@ -34,6 +21,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="1" column="0">
<widget class="QCheckBox" name="opt_print_formatted_paras">
<property name="text">
<string>Assume print formatting</string>
</property>
</widget>
</item>
<item row="2" column="0"> <item row="2" column="0">
<widget class="QCheckBox" name="opt_markdown"> <widget class="QCheckBox" name="opt_markdown">
<property name="text"> <property name="text">
@ -51,15 +45,45 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="1" column="0"> <item row="4" column="0">
<widget class="QCheckBox" name="opt_print_formatted_paras"> <widget class="QCheckBox" name="opt_markdown_disable_toc">
<property name="text"> <property name="text">
<string>Assume print formatting</string> <string>Do not insert Table of Contents into output text when using markdown</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="5" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>213</height>
</size>
</property>
</spacer>
</item>
</layout> </layout>
</widget> </widget>
<resources/> <resources/>
<connections/> <connections>
<connection>
<sender>opt_markdown</sender>
<signal>toggled(bool)</signal>
<receiver>opt_markdown_disable_toc</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>76</x>
<y>80</y>
</hint>
<hint type="destinationlabel">
<x>418</x>
<y>105</y>
</hint>
</hints>
</connection>
</connections>
</ui> </ui>

View File

@ -6,6 +6,7 @@ from PyQt4.QtGui import QDialog
from calibre.gui2.dialogs.tag_editor_ui import Ui_TagEditor from calibre.gui2.dialogs.tag_editor_ui import Ui_TagEditor
from calibre.gui2 import qstring_to_unicode from calibre.gui2 import qstring_to_unicode
from calibre.gui2 import question_dialog, error_dialog from calibre.gui2 import question_dialog, error_dialog
from calibre.constants import islinux
class TagEditor(QDialog, Ui_TagEditor): class TagEditor(QDialog, Ui_TagEditor):
@ -42,7 +43,8 @@ class TagEditor(QDialog, Ui_TagEditor):
self.connect(self.add_tag_button, SIGNAL('clicked()'), self.add_tag) self.connect(self.add_tag_button, SIGNAL('clicked()'), self.add_tag)
self.connect(self.delete_button, SIGNAL('clicked()'), self.delete_tags) self.connect(self.delete_button, SIGNAL('clicked()'), self.delete_tags)
self.connect(self.add_tag_input, SIGNAL('returnPressed()'), self.add_tag) self.connect(self.add_tag_input, SIGNAL('returnPressed()'), self.add_tag)
self.connect(self.available_tags, SIGNAL('itemActivated(QListWidgetItem*)'), self.apply_tags) if not islinux:
self.connect(self.available_tags, SIGNAL('itemActivated(QListWidgetItem*)'), self.apply_tags)
self.connect(self.applied_tags, SIGNAL('itemActivated(QListWidgetItem*)'), self.unapply_tags) self.connect(self.applied_tags, SIGNAL('itemActivated(QListWidgetItem*)'), self.unapply_tags)

View File

@ -576,6 +576,13 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
self.location_view.setCurrentIndex(self.location_view.model().index(0)) self.location_view.setCurrentIndex(self.location_view.model().index(0))
if self.cover_flow is not None and dynamic.get('cover_flow_visible', False):
self.status_bar.cover_flow_button.toggle()
if dynamic.get('tag_view_visible', False):
self.status_bar.tag_view_button.toggle()
def resizeEvent(self, ev): def resizeEvent(self, ev):
MainWindow.resizeEvent(self, ev) MainWindow.resizeEvent(self, ev)
self.search.setMaximumWidth(self.width()-150) self.search.setMaximumWidth(self.width()-150)
@ -1837,6 +1844,8 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
def write_settings(self): def write_settings(self):
config.set('main_window_geometry', self.saveGeometry()) config.set('main_window_geometry', self.saveGeometry())
dynamic.set('sort_column', self.library_view.model().sorted_on) dynamic.set('sort_column', self.library_view.model().sorted_on)
dynamic.set('tag_view_visible', self.tags_view.isVisible())
dynamic.set('cover_flow_visible', self.cover_flow.isVisible())
self.library_view.write_settings() self.library_view.write_settings()
if self.device_connected: if self.device_connected:
self.save_device_view_settings() self.save_device_view_settings()

View File

@ -924,7 +924,10 @@ class LibraryDatabase2(LibraryDatabase):
fmt_path = os.path.join(path, name+format) fmt_path = os.path.join(path, name+format)
if os.path.exists(fmt_path): if os.path.exists(fmt_path):
return fmt_path return fmt_path
candidates = glob.glob(os.path.join(path, '*'+format)) try:
candidates = glob.glob(os.path.join(path, '*'+format))
except: # If path contains strange characters this throws an exc
candidates = []
if format and candidates and os.path.exists(candidates[0]): if format and candidates and os.path.exists(candidates[0]):
shutil.copyfile(candidates[0], fmt_path) shutil.copyfile(candidates[0], fmt_path)
return fmt_path return fmt_path
@ -1122,10 +1125,18 @@ class LibraryDatabase2(LibraryDatabase):
self.set_path(id, True) self.set_path(id, True)
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_metadata(self, id, mi): def set_metadata(self, id, mi, ignore_errors=False):
''' '''
Set metadata for the book `id` from the `MetaInformation` object `mi` Set metadata for the book `id` from the `MetaInformation` object `mi`
''' '''
def doit(func, *args, **kwargs):
try:
func(*args, **kwargs)
except:
if ignore_errors:
traceback.print_exc()
else:
raise
if mi.title: if mi.title:
self.set_title(id, mi.title) self.set_title(id, mi.title)
if not mi.authors: if not mi.authors:
@ -1135,29 +1146,29 @@ class LibraryDatabase2(LibraryDatabase):
authors += string_to_authors(a) authors += string_to_authors(a)
self.set_authors(id, authors, notify=False) self.set_authors(id, authors, notify=False)
if mi.author_sort: if mi.author_sort:
self.set_author_sort(id, mi.author_sort, notify=False) doit(self.set_author_sort, id, mi.author_sort, notify=False)
if mi.publisher: if mi.publisher:
self.set_publisher(id, mi.publisher, notify=False) doit(self.set_publisher, id, mi.publisher, notify=False)
if mi.rating: if mi.rating:
self.set_rating(id, mi.rating, notify=False) doit(self.set_rating, id, mi.rating, notify=False)
if mi.series: if mi.series:
self.set_series(id, mi.series, notify=False) doit(self.set_series, id, mi.series, notify=False)
if mi.cover_data[1] is not None: if mi.cover_data[1] is not None:
self.set_cover(id, mi.cover_data[1]) doit(self.set_cover, id, mi.cover_data[1])
elif mi.cover is not None and os.access(mi.cover, os.R_OK): elif mi.cover is not None and os.access(mi.cover, os.R_OK):
self.set_cover(id, open(mi.cover, 'rb').read()) doit(self.set_cover, id, open(mi.cover, 'rb'))
if mi.tags: if mi.tags:
self.set_tags(id, mi.tags, notify=False) doit(self.set_tags, id, mi.tags, notify=False)
if mi.comments: if mi.comments:
self.set_comment(id, mi.comments, notify=False) doit(self.set_comment, id, mi.comments, notify=False)
if mi.isbn and mi.isbn.strip(): if mi.isbn and mi.isbn.strip():
self.set_isbn(id, mi.isbn, notify=False) doit(self.set_isbn, id, mi.isbn, notify=False)
if mi.series_index: if mi.series_index:
self.set_series_index(id, mi.series_index, notify=False) doit(self.set_series_index, id, mi.series_index, notify=False)
if mi.pubdate: if mi.pubdate:
self.set_pubdate(id, mi.pubdate, notify=False) doit(self.set_pubdate, id, mi.pubdate, notify=False)
if getattr(mi, 'timestamp', None) is not None: if getattr(mi, 'timestamp', None) is not None:
self.set_timestamp(id, mi.timestamp, notify=False) doit(self.set_timestamp, id, mi.timestamp, notify=False)
self.set_path(id, True) self.set_path(id, True)
self.notify('metadata', [id]) self.notify('metadata', [id])
@ -1353,7 +1364,10 @@ class LibraryDatabase2(LibraryDatabase):
def set_series_index(self, id, idx, notify=True): def set_series_index(self, id, idx, notify=True):
if idx is None: if idx is None:
idx = 1.0 idx = 1.0
idx = float(idx) try:
idx = float(idx)
except:
idx = 1.0
self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (idx, id)) self.conn.execute('UPDATE books SET series_index=? WHERE id=?', (idx, id))
self.conn.commit() self.conn.commit()
self.data.set(id, FIELD_MAP['series_index'], idx, row_is_id=True) self.data.set(id, FIELD_MAP['series_index'], idx, row_is_id=True)
@ -1513,7 +1527,7 @@ class LibraryDatabase2(LibraryDatabase):
id = obj.lastrowid id = obj.lastrowid
self.data.books_added([id], self) self.data.books_added([id], self)
self.set_path(id, True) self.set_path(id, True)
self.set_metadata(id, mi) self.set_metadata(id, mi, ignore_errors=True)
for path in formats: for path in formats:
ext = os.path.splitext(path)[1][1:].lower() ext = os.path.splitext(path)[1][1:].lower()
if ext == 'opf': if ext == 'opf':

View File

@ -79,7 +79,7 @@ class LibraryServer(object):
</book> </book>
''') ''')
MOBILE_UA = re.compile('(?i)(?:iPhone|Opera Mini|NetFront|webOS|Mobile|Android|imode|DoCoMo|Minimo|Blackberry|MIDP|Symbian)') MOBILE_UA = re.compile('(?i)(?:iPhone|Opera Mini|NetFront|webOS|Mobile|Android|imode|DoCoMo|Minimo|Blackberry|MIDP|Symbian|HD2)')
MOBILE_BOOK = textwrap.dedent('''\ MOBILE_BOOK = textwrap.dedent('''\
<tr xmlns:py="http://genshi.edgewall.org/"> <tr xmlns:py="http://genshi.edgewall.org/">
@ -90,7 +90,7 @@ class LibraryServer(object):
<py:for each="format in r[13].split(',')"> <py:for each="format in r[13].split(',')">
<span class="button"><a href="/get/${format}/${authors}-${r[1]}_${r[0]}.${format}">${format.lower()}</a></span>&nbsp; <span class="button"><a href="/get/${format}/${authors}-${r[1]}_${r[0]}.${format}">${format.lower()}</a></span>&nbsp;
</py:for> </py:for>
${r[1]} by ${authors} - ${r[6]/1024}k - ${r[3] if r[3] else ''} ${pubdate} ${'['+r[7]+']' if r[7] else ''} ${r[1]}${(' ['+r[9]+'-'+r[10]+']') if r[9] else ''} by ${authors} - ${r[6]/1024}k - ${r[3] if r[3] else ''} ${pubdate} ${'['+r[7]+']' if r[7] else ''}
</td> </td>
</tr> </tr>
''') ''')
@ -802,7 +802,7 @@ class LibraryServer(object):
@expose @expose
def get(self, what, id): def get(self, what, id, *args, **kwargs):
'Serves files, covers, thumbnails from the calibre database' 'Serves files, covers, thumbnails from the calibre database'
try: try:
id = int(id) id = int(id)

View File

@ -140,11 +140,11 @@ First install the Stanza reader on your iPhone using iTunes.
* Convert the books you want to read on your iPhone to EPUB format by selecting them and clicking the Convert button. * Convert the books you want to read on your iPhone to EPUB format by selecting them and clicking the Convert button.
* Turn on the Content Server in |app|'s preferences and leave |app| running. * Turn on the Content Server in |app|'s preferences and leave |app| running.
Now you should be able to access your books on your iPhone by opening Stanza and going to "Shared Books". Under Shared Books you will see an entry "Book in calibre". If you don't, make sure your iPhone is connected using the WiFi network in your house, not 3G. If the |app| catalog is still not detected in Stanza, you can add it manually in Stanza, by clicking "Online Catalog" and the clicking the plus icon in the lower right corner to add a new catalog. In the Add Catalog screen enter whatever name you like and in the URL field, enter the following:: Now you should be able to access your books on your iPhone by opening Stanza. Go to "Get Books" and then click the "Shared" tab. Under Shared you will see an entry "Books in calibre". If you don't, make sure your iPhone is connected using the WiFi network in your house, not 3G. If the |app| catalog is still not detected in Stanza, you can add it manually in Stanza. To do this, click the "Shared" tab, then click the "Edit" button and then click "Add book source" to add a new book source. In the Add Book Source screen enter whatever name you like and in the URL field, enter the following::
http://192.168.1.2:8080/ http://192.168.1.2:8080/
Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address. Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address. Now click "Save" and you are done.
How do I use |app| with my Android phone? How do I use |app| with my Android phone?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -191,9 +191,9 @@ class RecursiveFetcher(object):
if isinstance(url, unicode): if isinstance(url, unicode):
url = url.encode('utf-8') url = url.encode('utf-8')
# Not sure is this is really needed as I think mechanize # Not sure is this is really needed as I think mechanize
# handles quoting automatically, but leaving it in # handles quoting automatically, but leaving it
# in case it breaks something # in case it breaks something
if re.search(r'\s+|,', url) is not None: if re.search(r'\s+', url) is not None:
purl = list(urlparse.urlparse(url)) purl = list(urlparse.urlparse(url))
for i in range(2, 6): for i in range(2, 6):
purl[i] = quote(purl[i]) purl[i] = quote(purl[i])