CofeehousePy/deps/scikit-image/skimage/io/collection.py

460 lines
15 KiB
Python

"""Data structures to hold collections of images, with optional caching."""
import os
from glob import glob
import re
from collections.abc import Sequence
from copy import copy
import numpy as np
from PIL import Image, __version__ as pil_version
# Check CVE-2020-10379
from distutils.version import LooseVersion
if LooseVersion(pil_version) < LooseVersion('7.1.0'):
from warnings import warn
warn('Your installed pillow version is < 7.1.0. '
'Several security issues (CVE-2020-11538, '
'CVE-2020-10379, CVE-2020-10994, CVE-2020-10177) '
'have been fixed in pillow 7.1.0 or higher. '
'We recommend to upgrade this library.',
stacklevel=2)
from tifffile import TiffFile
__all__ = ['MultiImage', 'ImageCollection', 'concatenate_images',
'imread_collection_wrapper']
def concatenate_images(ic):
"""Concatenate all images in the image collection into an array.
Parameters
----------
ic : an iterable of images
The images to be concatenated.
Returns
-------
array_cat : ndarray
An array having one more dimension than the images in `ic`.
See Also
--------
ImageCollection.concatenate, MultiImage.concatenate
Raises
------
ValueError
If images in `ic` don't have identical shapes.
Notes
-----
``concatenate_images`` receives any iterable object containing images,
including ImageCollection and MultiImage, and returns a NumPy array.
"""
all_images = [image[np.newaxis, ...] for image in ic]
try:
array_cat = np.concatenate(all_images)
except ValueError:
raise ValueError('Image dimensions must agree.')
return array_cat
def alphanumeric_key(s):
"""Convert string to list of strings and ints that gives intuitive sorting.
Parameters
----------
s : string
Returns
-------
k : a list of strings and ints
Examples
--------
>>> alphanumeric_key('z23a')
['z', 23, 'a']
>>> filenames = ['f9.10.png', 'e10.png', 'f9.9.png', 'f10.10.png',
... 'f10.9.png']
>>> sorted(filenames)
['e10.png', 'f10.10.png', 'f10.9.png', 'f9.10.png', 'f9.9.png']
>>> sorted(filenames, key=alphanumeric_key)
['e10.png', 'f9.9.png', 'f9.10.png', 'f10.9.png', 'f10.10.png']
"""
k = [int(c) if c.isdigit() else c for c in re.split('([0-9]+)', s)]
return k
def _is_multipattern(input_pattern):
"""Helping function. Returns True if pattern contains a tuple, list, or a
string separated with os.pathsep."""
# Conditions to be accepted by ImageCollection:
has_str_ospathsep = (isinstance(input_pattern, str)
and os.pathsep in input_pattern)
not_a_string = not isinstance(input_pattern, str)
has_iterable = isinstance(input_pattern, Sequence)
has_strings = all(isinstance(pat, str) for pat in input_pattern)
is_multipattern = (has_str_ospathsep or
(not_a_string
and has_iterable
and has_strings))
return is_multipattern
class ImageCollection(object):
"""Load and manage a collection of image files.
Parameters
----------
load_pattern : str or list of str
Pattern string or list of strings to load. The filename path can be
absolute or relative.
conserve_memory : bool, optional
If True, `ImageCollection` does not keep more than one in memory at a
specific time. Otherwise, images will be cached once they are loaded.
Other parameters
----------------
load_func : callable
``imread`` by default. See notes below.
Attributes
----------
files : list of str
If a pattern string is given for `load_pattern`, this attribute
stores the expanded file list. Otherwise, this is equal to
`load_pattern`.
Notes
-----
Note that files are always returned in alphanumerical order. Also note
that slicing returns a new ImageCollection, *not* a view into the data.
ImageCollection can be modified to load images from an arbitrary
source by specifying a combination of `load_pattern` and
`load_func`. For an ImageCollection ``ic``, ``ic[5]`` uses
``load_func(load_pattern[5])`` to load the image.
Imagine, for example, an ImageCollection that loads every third
frame from a video file::
video_file = 'no_time_for_that_tiny.gif'
def vidread_step(f, step):
vid = imageio.get_reader(f)
seq = [v for v in vid.iter_data()]
return seq[::step]
ic = ImageCollection(video_file, load_func=vidread_step, step=3)
ic # is an ImageCollection object of length 1 because there is 1 file
x = ic[0] # calls vidread_step(video_file, step=3)
x[5] # is the sixth element of a list of length 8 (24 / 3)
Another use of ``load_func`` would be to convert all images to ``uint8``::
def imread_convert(f):
return imread(f).astype(np.uint8)
ic = ImageCollection('/tmp/*.png', load_func=imread_convert)
Examples
--------
>>> import skimage.io as io
>>> from skimage import data_dir
>>> coll = io.ImageCollection(data_dir + '/chess*.png')
>>> len(coll)
2
>>> coll[0].shape
(200, 200)
>>> ic = io.ImageCollection(['/tmp/work/*.png', '/tmp/other/*.jpg'])
"""
def __init__(self, load_pattern, conserve_memory=True, load_func=None,
**load_func_kwargs):
"""Load and manage a collection of images."""
self._files = []
if _is_multipattern(load_pattern):
if isinstance(load_pattern, str):
load_pattern = load_pattern.split(os.pathsep)
for pattern in load_pattern:
self._files.extend(glob(pattern))
elif isinstance(load_pattern, str):
self._files.extend(glob(load_pattern))
else:
raise TypeError('Invalid pattern as input.')
self._files = sorted(self._files, key=alphanumeric_key)
if load_func is None:
from ._io import imread
self.load_func = imread
self._numframes = self._find_images()
else:
self.load_func = load_func
self._numframes = len(self._files)
self._frame_index = None
if conserve_memory:
memory_slots = 1
else:
memory_slots = self._numframes
self._conserve_memory = conserve_memory
self._cached = None
self.load_func_kwargs = load_func_kwargs
self.data = np.empty(memory_slots, dtype=object)
@property
def files(self):
return self._files
@property
def conserve_memory(self):
return self._conserve_memory
def _find_images(self):
index = []
for fname in self._files:
if fname.lower().endswith(('.tiff', '.tif')):
with open(fname, 'rb') as f:
img = TiffFile(f)
index += [(fname, i) for i in range(len(img.pages))]
else:
try:
im = Image.open(fname)
im.seek(0)
except (IOError, OSError):
continue
i = 0
while True:
try:
im.seek(i)
except EOFError:
break
index.append((fname, i))
i += 1
if hasattr(im, 'fp') and im.fp:
im.fp.close()
self._frame_index = index
return len(index)
def __getitem__(self, n):
"""Return selected image(s) in the collection.
Loading is done on demand.
Parameters
----------
n : int or slice
The image number to be returned, or a slice selecting the images
and ordering to be returned in a new ImageCollection.
Returns
-------
img : ndarray or ImageCollection.
The `n`-th image in the collection, or a new ImageCollection with
the selected images.
"""
if hasattr(n, '__index__'):
n = n.__index__()
if type(n) not in [int, slice]:
raise TypeError('slicing must be with an int or slice object')
if type(n) is int:
n = self._check_imgnum(n)
idx = n % len(self.data)
if ((self.conserve_memory and n != self._cached) or
(self.data[idx] is None)):
kwargs = self.load_func_kwargs
if self._frame_index:
fname, img_num = self._frame_index[n]
if img_num is not None:
kwargs['img_num'] = img_num
try:
self.data[idx] = self.load_func(fname, **kwargs)
# Account for functions that do not accept an img_num kwarg
except TypeError as e:
if "unexpected keyword argument 'img_num'" in str(e):
del kwargs['img_num']
self.data[idx] = self.load_func(fname, **kwargs)
else:
raise
else:
self.data[idx] = self.load_func(self.files[n], **kwargs)
self._cached = n
return self.data[idx]
else:
# A slice object was provided, so create a new ImageCollection
# object. Any loaded image data in the original ImageCollection
# will be copied by reference to the new object. Image data
# loaded after this creation is not linked.
fidx = range(self._numframes)[n]
new_ic = copy(self)
if self._frame_index:
new_ic._files = [self._frame_index[i][0] for i in fidx]
new_ic._frame_index = [self._frame_index[i] for i in fidx]
else:
new_ic._files = [self._files[i] for i in fidx]
new_ic._numframes = len(fidx)
if self.conserve_memory:
if self._cached in fidx:
new_ic._cached = fidx.index(self._cached)
new_ic.data = np.copy(self.data)
else:
new_ic.data = np.empty(1, dtype=object)
else:
new_ic.data = self.data[fidx]
return new_ic
def _check_imgnum(self, n):
"""Check that the given image number is valid."""
num = self._numframes
if -num <= n < num:
n = n % num
else:
raise IndexError("There are only %s images in the collection"
% num)
return n
def __iter__(self):
"""Iterate over the images."""
for i in range(len(self)):
yield self[i]
def __len__(self):
"""Number of images in collection."""
return self._numframes
def __str__(self):
return str(self.files)
def reload(self, n=None):
"""Clear the image cache.
Parameters
----------
n : None or int
Clear the cache for this image only. By default, the
entire cache is erased.
"""
self.data = np.empty_like(self.data)
def concatenate(self):
"""Concatenate all images in the collection into an array.
Returns
-------
ar : np.ndarray
An array having one more dimension than the images in `self`.
See Also
--------
concatenate_images
Raises
------
ValueError
If images in the `ImageCollection` don't have identical shapes.
"""
return concatenate_images(self)
def imread_collection_wrapper(imread):
def imread_collection(load_pattern, conserve_memory=True):
"""Return an `ImageCollection` from files matching the given pattern.
Note that files are always stored in alphabetical order. Also note that
slicing returns a new ImageCollection, *not* a view into the data.
See `skimage.io.ImageCollection` for details.
Parameters
----------
load_pattern : str or list
Pattern glob or filenames to load. The path can be absolute or
relative. Multiple patterns should be separated by a colon,
e.g. ``/tmp/work/*.png:/tmp/other/*.jpg``. Also see
implementation notes below.
conserve_memory : bool, optional
If True, never keep more than one in memory at a specific
time. Otherwise, images will be cached once they are loaded.
"""
return ImageCollection(load_pattern, conserve_memory=conserve_memory,
load_func=imread)
return imread_collection
class MultiImage(ImageCollection):
"""A class containing all frames from multi-frame images.
Parameters
----------
load_pattern : str or list of str
Pattern glob or filenames to load. The path can be absolute or
relative.
conserve_memory : bool, optional
Whether to conserve memory by only caching a single frame. Default is
True.
Other parameters
----------------
load_func : callable
``imread`` by default. See notes below.
Notes
-----
If ``conserve_memory=True`` the memory footprint can be reduced, however
the performance can be affected because frames have to be read from file
more often.
The last accessed frame is cached, all other frames will have to be read
from file.
The current implementation makes use of ``tifffile`` for Tiff files and
PIL otherwise.
Examples
--------
>>> from skimage import data_dir
>>> img = MultiImage(data_dir + '/multipage.tif') # doctest: +SKIP
>>> len(img) # doctest: +SKIP
2
>>> for frame in img: # doctest: +SKIP
... print(frame.shape) # doctest: +SKIP
(15, 10)
(15, 10)
"""
def __init__(self, filename, conserve_memory=True, dtype=None,
**imread_kwargs):
"""Load a multi-img."""
from ._io import imread
self._filename = filename
super(MultiImage, self).__init__(filename, conserve_memory,
load_func=imread, **imread_kwargs)
@property
def filename(self):
return self._filename