CofeehousePy/deps/scikit-image/skimage/graph/heap.pyx

757 lines
25 KiB
Cython

# -*- python -*-
"""Cython implementation of a binary min heap.
Original author: Almar Klein
Modified by: Zachary Pincus
License: BSD
Copyright 2009 Almar Klein
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
# cython specific imports
import cython
from libc.stdlib cimport malloc, free
cdef extern from "pyport.h":
double Py_HUGE_VAL
cdef VALUE_T inf = Py_HUGE_VAL
# this is handy
cdef inline INDEX_T index_min(INDEX_T a, INDEX_T b) nogil:
return a if a <= b else b
cdef class BinaryHeap:
"""BinaryHeap(initial_capacity=128)
A binary heap class that can store values and an integer reference.
A binary heap is an object to store values in, optimized in such a way
that the minimum (or maximum, but a minimum in this implementation)
value can be found in O(log2(N)) time. In this implementation, a reference
value (a single integer) can also be stored with each value.
Use the methods push() and pop() to put in or extract values.
In C, use the corresponding push_fast() and pop_fast().
Parameters
----------
initial_capacity : int
Estimate of the size of the heap, if known in advance. (In any case,
the heap will dynamically grow and shrink as required, though never
below the `initial_capacity`.)
Attributes
----------
count : int
The number of values in the heap
levels : int
The number of levels in the binary heap (see Notes below). The values
are stored in the last level, so 2**levels is the capacity of the
heap before another resize is required.
min_levels : int
The minimum number of levels in the heap (relates to the
`initial_capacity` parameter.)
Notes
-----
This implementation stores the binary heap in an array twice as long as
the number of elements in the heap. The array is structured in levels,
starting at level 0 with a single value, doubling the amount of values in
each level. The final level contains the actual values, the level before
it contains the pairwise minimum values. The level before that contains
the pairwise minimum values of that level, etc. Take a look at this
illustration:
level: 0 11 2222 33333333 4444444444444444
index: 0 12 3456 78901234 5678901234567890
1 2 3
The actual values are stored in level 4. The minimum value of position 15
and 16 is stored in position 7. min(17,18)->8, min(7,8)->3, min(3,4)->1.
When adding a value, only the path to the top has to be updated, which
takesO(log2(N)) time.
The advantage of this implementation relative to more common
implementations that swap values when pushing to the array is that data
only needs to be swapped once when an element is removed. This means that
keeping an array of references along with the values is very inexpensive.
Th disadvantage is that if you pop the minimum value, the tree has to be
traced from top to bottom and back. So if you only want values and no
references, this implementation will probably be slower. If you need
references (and maybe cross references to be kept up to date) this
implementation will be faster.
"""
## Basic methods
# The following lines are always "inlined", but documented here for
# clarity:
#
# To calculate the start index of a certain level:
# 2**l-1 # LevelStart
# Note that in inner loops, this may also be represented as (1<<l)-1,
# because code of the form x**y goes via the python pow operations and
# can thus be a bit slower.
#
# To calculate the corresponding ABSOLUTE index at the next level:
# i*2+1 # CalcNextAbs
#
# To calculate the corresponding ABSOLUTE index at the previous level:
# (i-1)/2 # CalcPrevAbs
#
# To calculate the capacity at a certain level:
# 2**l
def __cinit__(self, INDEX_T initial_capacity=128, *args, **kws):
# calc levels from the default capacity
cdef LEVELS_T levels = 0
while 2**levels < initial_capacity:
levels += 1
# set levels
self.min_levels = self.levels = levels
# we start with 0 values
self.count = 0
# allocate arrays
cdef INDEX_T number = 2**self.levels
self._values = <VALUE_T *>malloc(2 * number * sizeof(VALUE_T))
self._references = <REFERENCE_T *>malloc(number * sizeof(REFERENCE_T))
def __init__(self, INDEX_T initial_capacity=128):
"""__init__(initial_capacity=128)
Class constructor.
Takes an optional parameter 'initial_capacity' so that
if the required heap capacity is known or can be estimated in advance,
there will need to be fewer resize operations on the heap.
"""
if self._values is NULL or self._references is NULL:
raise MemoryError()
self.reset()
def reset(self):
"""reset()
Reset the heap to default, empty state.
"""
cdef INDEX_T number = 2**self.levels
cdef INDEX_T i
cdef VALUE_T *values = self._values
for i in range(number * 2):
values[i] = inf
def __dealloc__(self):
free(self._values)
free(self._references)
def __str__(self):
s = ''
for level in range(1, self.levels + 1):
i0 = 2**level - 1 # LevelStart
s += 'level %i: ' % level
for i in range(i0, i0 + 2**level):
s += '%g, ' % self._values[i]
s = s[:-1] + '\n'
return s
## C Maintenance methods
cdef void _add_or_remove_level(self, LEVELS_T add_or_remove) nogil:
# init indexing ints
cdef INDEX_T i, i1, i2, n
# new amount of levels
cdef LEVELS_T new_levels = self.levels + add_or_remove
# allocate new arrays
cdef INDEX_T number = 2**new_levels
cdef VALUE_T *values
cdef REFERENCE_T *references
values = <VALUE_T *>malloc(number * 2 * sizeof(VALUE_T))
references = <REFERENCE_T *>malloc(number * sizeof(REFERENCE_T))
if values is NULL or references is NULL:
free(values)
free(references)
with gil:
raise MemoryError()
# init arrays
for i in range(number * 2):
values[i] = inf
for i in range(number):
references[i] = -1
# copy data
cdef VALUE_T *old_values = self._values
cdef REFERENCE_T *old_references = self._references
if self.count:
i1 = 2**new_levels - 1 # LevelStart
i2 = 2**self.levels - 1 # LevelStart
n = index_min(2**new_levels, 2**self.levels)
for i in range(n):
values[i1+i] = old_values[i2+i]
for i in range(n):
references[i] = old_references[i]
# make current
free(self._values)
free(self._references)
self._values = values
self._references = references
# we need a full update
self.levels = new_levels
self._update()
cdef void _update(self) nogil:
"""Update the full tree from the bottom up.
This should be done after resizing.
"""
# shorter name for values
cdef VALUE_T *values = self._values
# Note that i represents an absolute index here
cdef INDEX_T i0, i, ii, n
cdef LEVELS_T level
# track tree
for level in range(self.levels, 1, -1):
i0 = (1 << level) - 1 # 2**level-1 = LevelStart
n = i0 + 1 # 2**level
for i in range(i0, i0+n, 2):
ii = (i-1) // 2 # CalcPrevAbs
if values[i] < values[i+1]:
values[ii] = values[i]
else:
values[ii] = values[i+1]
cdef void _update_one(self, INDEX_T i) nogil:
"""Update the tree for one value."""
# shorter name for values
cdef VALUE_T *values = self._values
# make index uneven
if i % 2 == 0:
i -= 1
# track tree
cdef INDEX_T ii
cdef LEVELS_T level
for level in range(self.levels, 1, -1):
ii = (i-1) // 2 # CalcPrevAbs
# test
if values[i] < values[i+1]:
values[ii] = values[i]
else:
values[ii] = values[i+1]
# next
if ii % 2:
i = ii
else:
i = ii - 1
cdef void _remove(self, INDEX_T i1) nogil:
"""Remove a value from the heap. By index."""
cdef LEVELS_T levels = self.levels
cdef INDEX_T count = self.count
# get indices
cdef INDEX_T i0 = (1 << levels) - 1 # 2**self.levels - 1 # LevelStart
cdef INDEX_T i2 = i0 + count - 1
# get relative indices
cdef INDEX_T r1 = i1 - i0
cdef INDEX_T r2 = count - 1
cdef VALUE_T *values = self._values
cdef REFERENCE_T *references = self._references
# swap with last
values[i1] = values[i2]
references[r1] = references[r2]
# make last Null
values[i2] = inf
# update
self.count -= 1
count -= 1
if (levels > self.min_levels) and (count < (1 << (levels-2))):
self._add_or_remove_level(-1)
else:
self._update_one(i1)
self._update_one(i2)
## C Public methods
cdef INDEX_T push_fast(self, VALUE_T value, REFERENCE_T reference) nogil:
"""The c-method for fast pushing.
Returns the index relative to the start of the last level in the heap.
"""
# We need to resize if currently it just fits.
cdef LEVELS_T levels = self.levels
cdef INDEX_T count = self.count
if count >= (1 << levels): # 2**self.levels:
self._add_or_remove_level(1)
levels += 1
# insert new value
cdef INDEX_T i = ((1 << levels) - 1) + count # LevelStart + n
self._values[i] = value
self._references[count] = reference
# update
self.count += 1
self._update_one(i)
# return
return count
cdef VALUE_T pop_fast(self) nogil:
"""The c-method for fast popping.
Returns the minimum value. The reference is put in self._popped_ref.
"""
# shorter name for values
cdef VALUE_T *values = self._values
# init index. start at 1 because we start in level 1
cdef LEVELS_T level
cdef INDEX_T i = 1
cdef LEVELS_T levels = self.levels
# search tree (using absolute indices)
for level in range(1, levels):
if values[i] <= values[i+1]:
i = i * 2 + 1 # CalcNextAbs
else:
i = (i+1) * 2 + 1 # CalcNextAbs
# select best one in last level
if values[i] <= values[i+1]:
i = i
else:
i += 1
# get values
cdef INDEX_T ir = i - ((1 << levels) - 1) # (2**self.levels-1)
# LevelStart
cdef VALUE_T value = values[i]
self._popped_ref = self._references[ir]
# remove it
if self.count:
self._remove(i)
# return
return value
## Python Public methods (that do not need to be VERY fast)
def push(self, VALUE_T value, REFERENCE_T reference=-1):
"""push(value, reference=-1)
Append a value to the heap, with optional reference.
Parameters
----------
value : float
Value to push onto the heap
reference : int, optional
Reference to associate with the given value.
"""
self.push_fast(value, reference)
def min_val(self):
"""min_val()
Get the minimum value on the heap.
Returns only the value, and does not remove it from the heap.
"""
# shorter name for values
cdef VALUE_T *values = self._values
# select best one in last level
if values[1] < values[2]:
return values[1]
else:
return values[2]
def values(self):
"""values()
Get the values in the heap as a list.
"""
cdef INDEX_T i0 = 2**self.levels - 1 # LevelStart
return [self._values[i] for i in range(i0, i0+self.count)]
def references(self):
"""references()
Get the references in the heap as a list.
"""
return [self._references[i] for i in range(self.count)]
def pop(self):
"""pop()
Get the minimum value and remove it from the list.
Returns
-------
value : float
reference : int
If no reference was provided, -1 is returned here.
Raises
------
IndexError
On attempt to pop from an empty heap
"""
if self.count == 0:
raise IndexError('pop from an empty heap')
value = self.pop_fast()
ref = self._popped_ref
return value, ref
cdef class FastUpdateBinaryHeap(BinaryHeap):
"""FastUpdateBinaryHeap(initial_capacity=128, max_reference=None)
Binary heap that allows the value of a reference to be updated quickly.
This heap class keeps cross-references so that the value associated with a
given reference can be quickly queried (O(1) time) or updated (O(log2(N))
time). This is ideal for pathfinding algorithms that implement some
variant of Dijkstra's algorithm.
Parameters
----------
initial_capacity : int
Estimate of the size of the heap, if known in advance. (In any case,
the heap will dynamically grow and shrink as required, though never
below the `initial_capacity`.)
max_reference : int, optional
Largest reference value that might be pushed to the heap. (Pushing a
larger value will result in an error.) If no value is provided,
`1-initial_capacity` will be used. For the cross-reference index to
work, all references must be in the range [0, max_reference];
references pushed outside of that range will not be added to the heap.
The cross-references are kept as a 1-d array of length
`max_reference+1', so memory use of this heap is effectively
O(max_reference)
Attributes
----------
count : int
The number of values in the heap
levels : int
The number of levels in the binary heap (see Notes below). The values
are stored in the last level, so 2**levels is the capacity of the
heap before another resize is required.
min_levels : int
The minimum number of levels in the heap (relates to the
`initial_capacity` parameter.)
max_reference : int
The provided or calculated maximum allowed reference value.
Notes
-----
The cross-references map data[reference]->internalindex, such that the
value corresponding to a given reference can be found efficiently. This
can be queried with the value_of() method.
A special method, push_if_lower() is provided that will update the heap if
the given reference is not in the heap, or if it is and the provided value
is lower than the current value in the heap. This is again useful for
pathfinding algorithms.
"""
def __cinit__(self, INDEX_T initial_capacity=128, max_reference=None):
if max_reference is None:
max_reference = initial_capacity - 1
self.max_reference = max_reference
self._crossref = <INDEX_T *>malloc((self.max_reference + 1) *
sizeof(INDEX_T))
def __init__(self, INDEX_T initial_capacity=128, max_reference=None):
"""__init__(initial_capacity=128, max_reference=None)
Class constructor.
"""
if self._crossref is NULL:
raise MemoryError()
# below will call self.reset
BinaryHeap.__init__(self, initial_capacity)
def __dealloc__(self):
free(self._crossref)
def reset(self):
"""reset()
Reset the heap to default, empty state.
"""
BinaryHeap.reset(self)
# set default values of crossrefs
cdef INDEX_T i
for i in range(self.max_reference + 1):
self._crossref[i] = -1
cdef void _remove(self, INDEX_T i1) nogil:
"""Remove a value from the heap. By index."""
cdef LEVELS_T levels = self.levels
cdef INDEX_T count = self.count
# get indices
cdef INDEX_T i0 = (1 << levels) - 1 # 2**self.levels - 1 # LevelStart
cdef INDEX_T i2 = i0 + count - 1
# get relative indices
cdef INDEX_T r1 = i1 - i0
cdef INDEX_T r2 = count - 1
cdef VALUE_T *values = self._values
cdef REFERENCE_T *references = self._references
cdef INDEX_T *crossref = self._crossref
# update cross reference
crossref[references[r2]] = r1
crossref[references[r1]] = -1 # disable removed item
# swap with last
values[i1] = values[i2]
references[r1] = references[r2]
# make last Null
values[i2] = inf
# update
self.count -= 1
count -= 1
if (levels > self.min_levels) & (count < (1 << (levels-2))):
self._add_or_remove_level(-1)
else:
self._update_one(i1)
self._update_one(i2)
cdef INDEX_T push_fast(self, VALUE_T value, REFERENCE_T reference) nogil:
"""The c method for fast pushing.
If the reference is already present, will update its value, otherwise
will append it.
If -1 is returned, the provided reference was out-of-bounds and no
value was pushed to the heap.
"""
if not (0 <= reference <= self.max_reference):
return -1
# init variable to store the index-in-the-heap
cdef INDEX_T i
# Reference is the index in the array where MCP is applied to.
# Find the index-in-the-heap using the crossref array.
cdef INDEX_T ir = self._crossref[reference]
if ir != -1:
# update
i = (1 << self.levels) - 1 + ir
self._values[i] = value
self._update_one(i)
return ir
# if not updated: append normally and store reference
ir = BinaryHeap.push_fast(self, value, reference)
self._crossref[reference] = ir
return ir
cdef INDEX_T push_if_lower_fast(self, VALUE_T value,
REFERENCE_T reference) nogil:
"""If the reference is already present, will update its value ONLY if
the new value is lower than the old one. If the reference is not
present, this append it. If a value was appended, self._pushed is
set to 1.
If -1 is returned, the provided reference was out-of-bounds and no
value was pushed to the heap.
"""
if not (0 <= reference <= self.max_reference):
return -1
# init variable to store the index-in-the-heap
cdef INDEX_T i
# Reference is the index in the array where MCP is applied to.
# Find the index-in-the-heap using the crossref array.
cdef INDEX_T ir = self._crossref[reference]
cdef VALUE_T *values = self._values
self._pushed = 1
if ir != -1:
# update
i = (1 << self.levels) - 1 + ir
if values[i] > value:
values[i] = value
self._update_one(i)
else:
self._pushed = 0
return ir
# if not updated: append normally and store reference
ir = BinaryHeap.push_fast(self, value, reference)
self._crossref[reference] = ir
return ir
cdef VALUE_T value_of_fast(self, REFERENCE_T reference):
"""Return the value corresponding to the given reference.
If inf is returned, the reference may be invalid: check the
_invaild_ref field in this case.
"""
if not (0 <= reference <= self.max_reference):
self._invalid_ref = 1
return inf
# init variable to store the index-in-the-heap
cdef INDEX_T i
# Reference is the index in the array where MCP is applied to.
# Find the index-in-the-heap using the crossref array.
cdef INDEX_T ir = self._crossref[reference]
self._invalid_ref = 0
if ir == -1:
self._invalid_ref = 1
return inf
i = (1 << self.levels) - 1 + ir
return self._values[i]
def push(self, double value, int reference):
"""push(value, reference)
Append/update a value in the heap.
Parameters
----------
value : float
reference : int
If the reference is already present in the array, the value for
that reference will be updated, otherwise the (value, reference)
pair will be added to the heap.
Raises
------
ValueError
On pushing a reference outside the range [0, max_reference].
"""
if self.push_fast(value, reference) == -1:
raise ValueError('reference outside of range [0, max_reference]')
def push_if_lower(self, double value, int reference):
"""push_if_lower(value, reference)
Append/update a value in the heap if the extant value is lower.
If the reference is already in the heap, update only of the new value
is lower than the current one. If the reference is not present, the
value will always be pushed to the heap.
Parameters
----------
value : float
reference : int
If the reference is already present in the array, the value for
that reference will be updated, otherwise the (value, reference)
pair will be added to the heap.
Returns
-------
pushed : bool
True if an append/update occurred, False if otherwise.
Raises
------
ValueError
On pushing a reference outside the range [0, max_reference].
"""
if self.push_if_lower_fast(value, reference) == -1:
raise ValueError('reference outside of range [0, max_reference]')
return self._pushed == 1
def value_of(self, int reference):
"""value_of(reference)
Get the value corresponding to a given reference.
Parameters
----------
reference : int
A reference already pushed to the heap.
Returns
-------
value : float
Raises
------
ValueError
On querying a reference outside the range [0, max_reference], or
not already pushed to the heap.
"""
value = self.value_of_fast(reference)
if self._invalid_ref:
raise ValueError('invalid reference')
return value
def cross_references(self):
"""Get the cross references in the heap as a list."""
return [self._crossref[i] for i in range(self.max_reference + 1)]