# -*- python -*- """Cython implementation of a binary min heap. Original author: Almar Klein Modified by: Zachary Pincus License: BSD Copyright 2009 Almar Klein Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ # cython specific imports import cython from libc.stdlib cimport malloc, free cdef extern from "pyport.h": double Py_HUGE_VAL cdef VALUE_T inf = Py_HUGE_VAL # this is handy cdef inline INDEX_T index_min(INDEX_T a, INDEX_T b) nogil: return a if a <= b else b cdef class BinaryHeap: """BinaryHeap(initial_capacity=128) A binary heap class that can store values and an integer reference. A binary heap is an object to store values in, optimized in such a way that the minimum (or maximum, but a minimum in this implementation) value can be found in O(log2(N)) time. In this implementation, a reference value (a single integer) can also be stored with each value. Use the methods push() and pop() to put in or extract values. In C, use the corresponding push_fast() and pop_fast(). Parameters ---------- initial_capacity : int Estimate of the size of the heap, if known in advance. (In any case, the heap will dynamically grow and shrink as required, though never below the `initial_capacity`.) Attributes ---------- count : int The number of values in the heap levels : int The number of levels in the binary heap (see Notes below). The values are stored in the last level, so 2**levels is the capacity of the heap before another resize is required. min_levels : int The minimum number of levels in the heap (relates to the `initial_capacity` parameter.) Notes ----- This implementation stores the binary heap in an array twice as long as the number of elements in the heap. The array is structured in levels, starting at level 0 with a single value, doubling the amount of values in each level. The final level contains the actual values, the level before it contains the pairwise minimum values. The level before that contains the pairwise minimum values of that level, etc. Take a look at this illustration: level: 0 11 2222 33333333 4444444444444444 index: 0 12 3456 78901234 5678901234567890 1 2 3 The actual values are stored in level 4. The minimum value of position 15 and 16 is stored in position 7. min(17,18)->8, min(7,8)->3, min(3,4)->1. When adding a value, only the path to the top has to be updated, which takesO(log2(N)) time. The advantage of this implementation relative to more common implementations that swap values when pushing to the array is that data only needs to be swapped once when an element is removed. This means that keeping an array of references along with the values is very inexpensive. Th disadvantage is that if you pop the minimum value, the tree has to be traced from top to bottom and back. So if you only want values and no references, this implementation will probably be slower. If you need references (and maybe cross references to be kept up to date) this implementation will be faster. """ ## Basic methods # The following lines are always "inlined", but documented here for # clarity: # # To calculate the start index of a certain level: # 2**l-1 # LevelStart # Note that in inner loops, this may also be represented as (1<malloc(2 * number * sizeof(VALUE_T)) self._references = malloc(number * sizeof(REFERENCE_T)) def __init__(self, INDEX_T initial_capacity=128): """__init__(initial_capacity=128) Class constructor. Takes an optional parameter 'initial_capacity' so that if the required heap capacity is known or can be estimated in advance, there will need to be fewer resize operations on the heap. """ if self._values is NULL or self._references is NULL: raise MemoryError() self.reset() def reset(self): """reset() Reset the heap to default, empty state. """ cdef INDEX_T number = 2**self.levels cdef INDEX_T i cdef VALUE_T *values = self._values for i in range(number * 2): values[i] = inf def __dealloc__(self): free(self._values) free(self._references) def __str__(self): s = '' for level in range(1, self.levels + 1): i0 = 2**level - 1 # LevelStart s += 'level %i: ' % level for i in range(i0, i0 + 2**level): s += '%g, ' % self._values[i] s = s[:-1] + '\n' return s ## C Maintenance methods cdef void _add_or_remove_level(self, LEVELS_T add_or_remove) nogil: # init indexing ints cdef INDEX_T i, i1, i2, n # new amount of levels cdef LEVELS_T new_levels = self.levels + add_or_remove # allocate new arrays cdef INDEX_T number = 2**new_levels cdef VALUE_T *values cdef REFERENCE_T *references values = malloc(number * 2 * sizeof(VALUE_T)) references = malloc(number * sizeof(REFERENCE_T)) if values is NULL or references is NULL: free(values) free(references) with gil: raise MemoryError() # init arrays for i in range(number * 2): values[i] = inf for i in range(number): references[i] = -1 # copy data cdef VALUE_T *old_values = self._values cdef REFERENCE_T *old_references = self._references if self.count: i1 = 2**new_levels - 1 # LevelStart i2 = 2**self.levels - 1 # LevelStart n = index_min(2**new_levels, 2**self.levels) for i in range(n): values[i1+i] = old_values[i2+i] for i in range(n): references[i] = old_references[i] # make current free(self._values) free(self._references) self._values = values self._references = references # we need a full update self.levels = new_levels self._update() cdef void _update(self) nogil: """Update the full tree from the bottom up. This should be done after resizing. """ # shorter name for values cdef VALUE_T *values = self._values # Note that i represents an absolute index here cdef INDEX_T i0, i, ii, n cdef LEVELS_T level # track tree for level in range(self.levels, 1, -1): i0 = (1 << level) - 1 # 2**level-1 = LevelStart n = i0 + 1 # 2**level for i in range(i0, i0+n, 2): ii = (i-1) // 2 # CalcPrevAbs if values[i] < values[i+1]: values[ii] = values[i] else: values[ii] = values[i+1] cdef void _update_one(self, INDEX_T i) nogil: """Update the tree for one value.""" # shorter name for values cdef VALUE_T *values = self._values # make index uneven if i % 2 == 0: i -= 1 # track tree cdef INDEX_T ii cdef LEVELS_T level for level in range(self.levels, 1, -1): ii = (i-1) // 2 # CalcPrevAbs # test if values[i] < values[i+1]: values[ii] = values[i] else: values[ii] = values[i+1] # next if ii % 2: i = ii else: i = ii - 1 cdef void _remove(self, INDEX_T i1) nogil: """Remove a value from the heap. By index.""" cdef LEVELS_T levels = self.levels cdef INDEX_T count = self.count # get indices cdef INDEX_T i0 = (1 << levels) - 1 # 2**self.levels - 1 # LevelStart cdef INDEX_T i2 = i0 + count - 1 # get relative indices cdef INDEX_T r1 = i1 - i0 cdef INDEX_T r2 = count - 1 cdef VALUE_T *values = self._values cdef REFERENCE_T *references = self._references # swap with last values[i1] = values[i2] references[r1] = references[r2] # make last Null values[i2] = inf # update self.count -= 1 count -= 1 if (levels > self.min_levels) and (count < (1 << (levels-2))): self._add_or_remove_level(-1) else: self._update_one(i1) self._update_one(i2) ## C Public methods cdef INDEX_T push_fast(self, VALUE_T value, REFERENCE_T reference) nogil: """The c-method for fast pushing. Returns the index relative to the start of the last level in the heap. """ # We need to resize if currently it just fits. cdef LEVELS_T levels = self.levels cdef INDEX_T count = self.count if count >= (1 << levels): # 2**self.levels: self._add_or_remove_level(1) levels += 1 # insert new value cdef INDEX_T i = ((1 << levels) - 1) + count # LevelStart + n self._values[i] = value self._references[count] = reference # update self.count += 1 self._update_one(i) # return return count cdef VALUE_T pop_fast(self) nogil: """The c-method for fast popping. Returns the minimum value. The reference is put in self._popped_ref. """ # shorter name for values cdef VALUE_T *values = self._values # init index. start at 1 because we start in level 1 cdef LEVELS_T level cdef INDEX_T i = 1 cdef LEVELS_T levels = self.levels # search tree (using absolute indices) for level in range(1, levels): if values[i] <= values[i+1]: i = i * 2 + 1 # CalcNextAbs else: i = (i+1) * 2 + 1 # CalcNextAbs # select best one in last level if values[i] <= values[i+1]: i = i else: i += 1 # get values cdef INDEX_T ir = i - ((1 << levels) - 1) # (2**self.levels-1) # LevelStart cdef VALUE_T value = values[i] self._popped_ref = self._references[ir] # remove it if self.count: self._remove(i) # return return value ## Python Public methods (that do not need to be VERY fast) def push(self, VALUE_T value, REFERENCE_T reference=-1): """push(value, reference=-1) Append a value to the heap, with optional reference. Parameters ---------- value : float Value to push onto the heap reference : int, optional Reference to associate with the given value. """ self.push_fast(value, reference) def min_val(self): """min_val() Get the minimum value on the heap. Returns only the value, and does not remove it from the heap. """ # shorter name for values cdef VALUE_T *values = self._values # select best one in last level if values[1] < values[2]: return values[1] else: return values[2] def values(self): """values() Get the values in the heap as a list. """ cdef INDEX_T i0 = 2**self.levels - 1 # LevelStart return [self._values[i] for i in range(i0, i0+self.count)] def references(self): """references() Get the references in the heap as a list. """ return [self._references[i] for i in range(self.count)] def pop(self): """pop() Get the minimum value and remove it from the list. Returns ------- value : float reference : int If no reference was provided, -1 is returned here. Raises ------ IndexError On attempt to pop from an empty heap """ if self.count == 0: raise IndexError('pop from an empty heap') value = self.pop_fast() ref = self._popped_ref return value, ref cdef class FastUpdateBinaryHeap(BinaryHeap): """FastUpdateBinaryHeap(initial_capacity=128, max_reference=None) Binary heap that allows the value of a reference to be updated quickly. This heap class keeps cross-references so that the value associated with a given reference can be quickly queried (O(1) time) or updated (O(log2(N)) time). This is ideal for pathfinding algorithms that implement some variant of Dijkstra's algorithm. Parameters ---------- initial_capacity : int Estimate of the size of the heap, if known in advance. (In any case, the heap will dynamically grow and shrink as required, though never below the `initial_capacity`.) max_reference : int, optional Largest reference value that might be pushed to the heap. (Pushing a larger value will result in an error.) If no value is provided, `1-initial_capacity` will be used. For the cross-reference index to work, all references must be in the range [0, max_reference]; references pushed outside of that range will not be added to the heap. The cross-references are kept as a 1-d array of length `max_reference+1', so memory use of this heap is effectively O(max_reference) Attributes ---------- count : int The number of values in the heap levels : int The number of levels in the binary heap (see Notes below). The values are stored in the last level, so 2**levels is the capacity of the heap before another resize is required. min_levels : int The minimum number of levels in the heap (relates to the `initial_capacity` parameter.) max_reference : int The provided or calculated maximum allowed reference value. Notes ----- The cross-references map data[reference]->internalindex, such that the value corresponding to a given reference can be found efficiently. This can be queried with the value_of() method. A special method, push_if_lower() is provided that will update the heap if the given reference is not in the heap, or if it is and the provided value is lower than the current value in the heap. This is again useful for pathfinding algorithms. """ def __cinit__(self, INDEX_T initial_capacity=128, max_reference=None): if max_reference is None: max_reference = initial_capacity - 1 self.max_reference = max_reference self._crossref = malloc((self.max_reference + 1) * sizeof(INDEX_T)) def __init__(self, INDEX_T initial_capacity=128, max_reference=None): """__init__(initial_capacity=128, max_reference=None) Class constructor. """ if self._crossref is NULL: raise MemoryError() # below will call self.reset BinaryHeap.__init__(self, initial_capacity) def __dealloc__(self): free(self._crossref) def reset(self): """reset() Reset the heap to default, empty state. """ BinaryHeap.reset(self) # set default values of crossrefs cdef INDEX_T i for i in range(self.max_reference + 1): self._crossref[i] = -1 cdef void _remove(self, INDEX_T i1) nogil: """Remove a value from the heap. By index.""" cdef LEVELS_T levels = self.levels cdef INDEX_T count = self.count # get indices cdef INDEX_T i0 = (1 << levels) - 1 # 2**self.levels - 1 # LevelStart cdef INDEX_T i2 = i0 + count - 1 # get relative indices cdef INDEX_T r1 = i1 - i0 cdef INDEX_T r2 = count - 1 cdef VALUE_T *values = self._values cdef REFERENCE_T *references = self._references cdef INDEX_T *crossref = self._crossref # update cross reference crossref[references[r2]] = r1 crossref[references[r1]] = -1 # disable removed item # swap with last values[i1] = values[i2] references[r1] = references[r2] # make last Null values[i2] = inf # update self.count -= 1 count -= 1 if (levels > self.min_levels) & (count < (1 << (levels-2))): self._add_or_remove_level(-1) else: self._update_one(i1) self._update_one(i2) cdef INDEX_T push_fast(self, VALUE_T value, REFERENCE_T reference) nogil: """The c method for fast pushing. If the reference is already present, will update its value, otherwise will append it. If -1 is returned, the provided reference was out-of-bounds and no value was pushed to the heap. """ if not (0 <= reference <= self.max_reference): return -1 # init variable to store the index-in-the-heap cdef INDEX_T i # Reference is the index in the array where MCP is applied to. # Find the index-in-the-heap using the crossref array. cdef INDEX_T ir = self._crossref[reference] if ir != -1: # update i = (1 << self.levels) - 1 + ir self._values[i] = value self._update_one(i) return ir # if not updated: append normally and store reference ir = BinaryHeap.push_fast(self, value, reference) self._crossref[reference] = ir return ir cdef INDEX_T push_if_lower_fast(self, VALUE_T value, REFERENCE_T reference) nogil: """If the reference is already present, will update its value ONLY if the new value is lower than the old one. If the reference is not present, this append it. If a value was appended, self._pushed is set to 1. If -1 is returned, the provided reference was out-of-bounds and no value was pushed to the heap. """ if not (0 <= reference <= self.max_reference): return -1 # init variable to store the index-in-the-heap cdef INDEX_T i # Reference is the index in the array where MCP is applied to. # Find the index-in-the-heap using the crossref array. cdef INDEX_T ir = self._crossref[reference] cdef VALUE_T *values = self._values self._pushed = 1 if ir != -1: # update i = (1 << self.levels) - 1 + ir if values[i] > value: values[i] = value self._update_one(i) else: self._pushed = 0 return ir # if not updated: append normally and store reference ir = BinaryHeap.push_fast(self, value, reference) self._crossref[reference] = ir return ir cdef VALUE_T value_of_fast(self, REFERENCE_T reference): """Return the value corresponding to the given reference. If inf is returned, the reference may be invalid: check the _invaild_ref field in this case. """ if not (0 <= reference <= self.max_reference): self._invalid_ref = 1 return inf # init variable to store the index-in-the-heap cdef INDEX_T i # Reference is the index in the array where MCP is applied to. # Find the index-in-the-heap using the crossref array. cdef INDEX_T ir = self._crossref[reference] self._invalid_ref = 0 if ir == -1: self._invalid_ref = 1 return inf i = (1 << self.levels) - 1 + ir return self._values[i] def push(self, double value, int reference): """push(value, reference) Append/update a value in the heap. Parameters ---------- value : float reference : int If the reference is already present in the array, the value for that reference will be updated, otherwise the (value, reference) pair will be added to the heap. Raises ------ ValueError On pushing a reference outside the range [0, max_reference]. """ if self.push_fast(value, reference) == -1: raise ValueError('reference outside of range [0, max_reference]') def push_if_lower(self, double value, int reference): """push_if_lower(value, reference) Append/update a value in the heap if the extant value is lower. If the reference is already in the heap, update only of the new value is lower than the current one. If the reference is not present, the value will always be pushed to the heap. Parameters ---------- value : float reference : int If the reference is already present in the array, the value for that reference will be updated, otherwise the (value, reference) pair will be added to the heap. Returns ------- pushed : bool True if an append/update occurred, False if otherwise. Raises ------ ValueError On pushing a reference outside the range [0, max_reference]. """ if self.push_if_lower_fast(value, reference) == -1: raise ValueError('reference outside of range [0, max_reference]') return self._pushed == 1 def value_of(self, int reference): """value_of(reference) Get the value corresponding to a given reference. Parameters ---------- reference : int A reference already pushed to the heap. Returns ------- value : float Raises ------ ValueError On querying a reference outside the range [0, max_reference], or not already pushed to the heap. """ value = self.value_of_fast(reference) if self._invalid_ref: raise ValueError('invalid reference') return value def cross_references(self): """Get the cross references in the heap as a list.""" return [self._crossref[i] for i in range(self.max_reference + 1)]