MTP driver: Fix infinite loop when connecting to some devices with more 65K objects in their filesystem. Fixes #2072384 [Calibre consumes all memory then crashes when connecting an MTP device](https://bugs.launchpad.net/calibre/+bug/2072384)

This commit is contained in:
Kovid Goyal 2024-07-06 16:40:05 +05:30
parent 802453a7ec
commit d8a8dd46a9
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -9,10 +9,11 @@ import json
import sys import sys
import time import time
import weakref import weakref
from collections import deque from collections import defaultdict, deque
from datetime import datetime from datetime import datetime
from itertools import chain from itertools import chain
from operator import attrgetter from operator import attrgetter
from typing import Dict, Tuple
from calibre import force_unicode, human_readable, prints from calibre import force_unicode, human_readable, prints
from calibre.ebooks import BOOK_EXTENSIONS from calibre.ebooks import BOOK_EXTENSIONS
@ -35,17 +36,22 @@ class ListEntry:
class FileOrFolder: class FileOrFolder:
def __init__(self, entry, fs_cache): def __init__(self, entry, fs_cache: 'FilesystemCache', is_storage: bool = False):
self.object_id = entry['id'] self.object_id = entry['id']
self.is_storage = is_storage
self.is_folder = entry['is_folder'] self.is_folder = entry['is_folder']
self.storage_id = entry['storage_id'] self.storage_id = entry['storage_id']
# self.parent_id is None for storage objects # self.parent_id is None for storage objects
self.parent_id = entry.get('parent_id', None) self.parent_id = entry.get('parent_id', None)
self.persistent_id = entry.get('persistent_id', self.object_id)
n = entry.get('name', None) n = entry.get('name', None)
if not n: if not n:
n = '___' if self.is_storage:
prefix = 'Storage'
else:
prefix = 'Folder' if self.is_folder else 'File'
n = f'{prefix}-{self.persistent_id}'
self.name = force_unicode(n, 'utf-8') self.name = force_unicode(n, 'utf-8')
self.persistent_id = entry.get('persistent_id', self.object_id)
self.size = entry.get('size', 0) self.size = entry.get('size', 0)
md = entry.get('modified', 0) md = entry.get('modified', 0)
try: try:
@ -53,7 +59,7 @@ class FileOrFolder:
self.last_modified = datetime(*(list(md)+[local_tz])) self.last_modified = datetime(*(list(md)+[local_tz]))
else: else:
self.last_modified = datetime.fromtimestamp(md, local_tz) self.last_modified = datetime.fromtimestamp(md, local_tz)
except: except Exception:
self.last_modified = datetime.fromtimestamp(0, local_tz) self.last_modified = datetime.fromtimestamp(0, local_tz)
self.last_mod_string = self.last_modified.strftime('%Y/%m/%d %H:%M') self.last_mod_string = self.last_modified.strftime('%Y/%m/%d %H:%M')
self.last_modified = as_utc(self.last_modified) self.last_modified = as_utc(self.last_modified)
@ -62,34 +68,37 @@ class FileOrFolder:
raise ValueError('Storage id %s not valid for %s, valid values: %s'%(self.storage_id, raise ValueError('Storage id %s not valid for %s, valid values: %s'%(self.storage_id,
entry, fs_cache.all_storage_ids)) entry, fs_cache.all_storage_ids))
if self.parent_id == 0:
self.parent_id = self.storage_id
self.is_hidden = entry.get('is_hidden', False) self.is_hidden = entry.get('is_hidden', False)
self.is_system = entry.get('is_system', False) self.is_system = entry.get('is_system', False)
self.can_delete = entry.get('can_delete', True) self.can_delete = entry.get('can_delete', True)
self.files = [] self.files = []
self.folders = [] self.folders = []
fs_cache.id_map[self.object_id] = self if not self.is_storage:
# storage ids can overlap filesystem object ids. See https://bugs.launchpad.net/bugs/2072384
# so only store actual filesystem object ids in id_map
fs_cache.id_maps[self.storage_id][self.object_id] = self
self.fs_cache = weakref.ref(fs_cache) self.fs_cache = weakref.ref(fs_cache)
self.deleted = False self.deleted = False
if self.storage_id == self.object_id: if self.is_storage:
self.storage_prefix = 'mtp:::%s:::'%self.persistent_id self.storage_prefix = 'mtp:::%s:::'%self.persistent_id
# Ignore non ebook files and AppleDouble files # Ignore non ebook files and AppleDouble files
self.is_ebook = (not self.is_folder and self.is_ebook = (not self.is_folder and not self.is_storage and
self.name.rpartition('.')[-1].lower() in bexts and not self.name.startswith('._')) self.name.rpartition('.')[-1].lower() in bexts and not self.name.startswith('._'))
def __repr__(self): def __repr__(self):
name = 'Folder' if self.is_folder else 'File' if self.is_storage:
name = 'Storage'
else:
name = 'Folder' if self.is_folder else 'File'
try: try:
path = str(self.full_path) path = str(self.full_path)
except: except:
path = '' path = ''
datum = 'size=%s'%(self.size) datum = 'size=%s'%(self.size)
if self.is_folder: if self.is_folder or self.is_storage:
datum = 'children=%s'%(len(self.files) + len(self.folders)) datum = 'children=%s'%(len(self.files) + len(self.folders))
return '%s(id=%s, storage_id=%s, %s, path=%s, modified=%s)'%(name, self.object_id, return '%s(id=%s, storage_id=%s, %s, path=%s, modified=%s)'%(name, self.object_id,
self.storage_id, datum, path, self.last_mod_string) self.storage_id, datum, path, self.last_mod_string)
@ -102,19 +111,27 @@ class FileOrFolder:
return not self.files and not self.folders return not self.files and not self.folders
@property @property
def id_map(self): def id_map(self) -> Dict[int, 'FileOrFolder']:
return self.fs_cache().id_map return self.fs_cache().id_maps[self.storage_id]
@property @property
def parent(self): def parent(self):
return None if self.parent_id is None else self.id_map[self.parent_id] if self.parent_id:
return self.id_map[self.parent_id]
if self.is_storage or self.parent_id is None:
return None
return self.fs_cache().storage(self.storage_id)
@property
def in_root(self):
return self.parent_id is not None and self.parent_id == 0
@property @property
def storage(self): def storage(self):
return self.fs_cache().storage(self.storage_id) return self.fs_cache().storage(self.storage_id)
@property @property
def full_path(self): def full_path(self) -> Tuple[str, ...]:
parts = deque() parts = deque()
parts.append(self.name) parts.append(self.name)
p = self.parent p = self.parent
@ -155,7 +172,7 @@ class FileOrFolder:
def list(self, recurse=False): def list(self, recurse=False):
if not self.is_folder: if not self.is_folder:
parent = self.id_map[self.parent_id] parent = self.parent
yield '/'.join(parent.full_path[1:]), ListEntry(self) yield '/'.join(parent.full_path[1:]), ListEntry(self)
return return
entries = [ListEntry(x) for x in chain(self.folders, self.files)] entries = [ListEntry(x) for x in chain(self.folders, self.files)]
@ -210,36 +227,33 @@ class FilesystemCache:
def __init__(self, all_storage, entries): def __init__(self, all_storage, entries):
self.entries = [] self.entries = []
self.id_map = {} self.id_maps = defaultdict(dict)
self.all_storage_ids = tuple(x['id'] for x in all_storage) self.all_storage_ids = tuple(x['id'] for x in all_storage)
for storage in all_storage: for storage in all_storage:
storage['storage_id'] = storage['id'] storage['storage_id'] = storage['id']
e = FileOrFolder(storage, self) e = FileOrFolder(storage, self, is_storage=True)
self.entries.append(e) self.entries.append(e)
self.entries.sort(key=attrgetter('object_id')) self.entries.sort(key=attrgetter('object_id'))
all_storage_ids = [x.storage_id for x in self.entries] self.all_storage_ids = tuple(x.storage_id for x in self.entries)
self.all_storage_ids = tuple(all_storage_ids)
for entry in entries: for entry in entries:
FileOrFolder(entry, self) FileOrFolder(entry, self)
for item in self.id_map.values(): for id_map in self.id_maps.values():
try: for item in id_map.values():
p = item.parent try:
except KeyError: p = item.parent
# Parent does not exist, set the parent to be the storage except KeyError:
# object # Parent does not exist, set the parent to be the storage
sid = item.storage_id # object
if sid not in all_storage_ids: item.parent_id = 0
sid = all_storage_ids[0] p = item.parent
item.parent_id = sid
p = item.parent
if p is not None: if p is not None:
t = p.folders if item.is_folder else p.files t = p.folders if item.is_folder else p.files
t.append(item) t.append(item)
def dump(self, out=sys.stdout): def dump(self, out=sys.stdout):
for e in self.entries: for e in self.entries:
@ -251,26 +265,37 @@ class FilesystemCache:
return e return e
def iterebooks(self, storage_id): def iterebooks(self, storage_id):
for x in self.id_map.values(): id_map = self.id_maps[storage_id]
if x.storage_id == storage_id and x.is_ebook: for x in id_map.values():
if x.parent_id == storage_id and x.name.lower().endswith('.txt'): if x.is_ebook:
if x.in_root and x.name.lower().endswith('.txt'):
continue # Ignore .txt files in the root continue # Ignore .txt files in the root
yield x yield x
def __len__(self): def __len__(self):
return len(self.id_map) ans = len(self.id_maps)
for id_map in self.id_maps.values():
ans += len(id_map)
return ans
def resolve_mtp_id_path(self, path): def resolve_mtp_id_path(self, path):
if not path.startswith('mtp:::'): if not path.startswith('mtp:::'):
raise ValueError('%s is not a valid MTP path'%path) raise ValueError('%s is not a valid MTP path'%path)
parts = path.split(':::') parts = path.split(':::', 2)
if len(parts) < 3: if len(parts) < 3:
raise ValueError('%s is not a valid MTP path'%path) raise ValueError('%s is not a valid MTP path'%path)
try: try:
object_id = json.loads(parts[1]) object_id = json.loads(parts[1])
except: except Exception:
raise ValueError('%s is not a valid MTP path'%path) raise ValueError('%s is not a valid MTP path'%path)
id_map = {}
path = parts[2]
storage_name = path.partition('/')[0]
for entry in self.entries:
if entry.name == storage_name:
id_map = self.id_maps[entry.storage_id]
break
try: try:
return self.id_map[object_id] return id_map[object_id]
except KeyError: except KeyError:
raise ValueError('No object found with MTP path: %s'%path) raise ValueError('No object found with MTP path: %s'%path)