# [Numpy-discussion] List/location of consecutive integers

Pierre GM pgmdevlist@gmail....
Fri May 22 15:15:19 CDT 2009

```On May 22, 2009, at 12:31 PM, Andrea Gavana wrote:

> Hi All,
>
>    this should be a very easy question but I am trying to make a
> script run as fast as possible, so please bear with me if the solution
> is easy and I just overlooked it.
>
> I have a list of integers, like this one:
>
> indices = [1,2,3,4,5,6,7,8,9,255,256,257,258,10001,10002,10003,10004]
>
>> From this list, I would like to find out which values are consecutive
> and store them in another list of tuples (begin_consecutive,
> end_consecutive) or a simple list: as an example, the previous list
> will become:
>
> new_list = [(1, 9), (255, 258), (10001, 10004)]

Josef's and Chris's solutions are pretty neat in this case. I've been
recently working on a more generic case where integers are grouped
depending on some condition (equals, differing by 1 or 2...). A
version in pure Python/numpy, the `Cluster` class is available in
scikits.hydroclimpy.core.tools (hydroclimpy.sourceforge.net).
Otherwise, here's a Cython version of the same class.
Let me know if it works. And I'm not ultra happy with the name, so if
you have any suggestions...

cdef class Brackets:
"""
Groups consecutive data from an array according to a clustering
condition.

A cluster is defined as a group of consecutive values differing
by at most the
increment value.

Missing values are **not** handled: the input sequence must
therefore be free
of missing values.

Parameters
----------
darray : ndarray
Input data array to clusterize.
increment : {float}, optional
Increment between two consecutive values to group.
By default, use a value of 1.
operator : {function}, optional
Comparison operator for the definition of clusters.
By default, use :func:`numpy.less_equal`.

Attributes
----------
inishape
Shape of the argument array (stored for resizing).
inisize
Size of the argument array.
uniques : sequence
List of unique cluster values, as they appear in
chronological order.
slices : sequence
List of the slices corresponding to each cluster of data.
starts : ndarray
Lists of the indices at which the clusters start.
ends : ndarray
Lists of the indices at which the clusters end.
clustered : list
List of clustered data.

Examples
--------
>>> A = [0, 0, 1, 2, 2, 2, 3, 4, 3, 4, 4, 4]
>>> klust = cluster(A,0)
>>> [list(_) for _ in klust.clustered]
[[0, 0], [1], [2, 2, 2], [3], [4], [3], [4, 4, 4]]
>>> klust.uniques
array([0, 1, 2, 3, 4, 3, 4])

>>> x = [ 1.8, 1.3, 2.4, 1.2, 2.5, 3.9, 1. , 3.8, 4.2, 3.3,
...       1.2, 0.2, 0.9, 2.7, 2.4, 2.8, 2.7, 4.7, 4.2, 0.4]
>>> Brackets(x,1).starts
array([ 0,  2,  3,  4,  5,  6,  7, 10, 11, 13, 17, 19])
>>> Brackets(x,1.5).starts
array([ 0,  6,  7, 10, 13, 17, 19])
>>> Brackets(x,2.5).starts
array([ 0,  6,  7, 19])
>>> Brackets(x,2.5,greater).starts
array([ 0,  1,  2,  3,  4,  5,  8,  9, 10,
...    11, 12, 13, 14, 15, 16, 17, 18])
>>> y = [ 0, -1, 0, 0, 0, 1, 1, -1, -1, -1, 1, 1, 0, 0, 0, 0, 1,
1, 0, 0]
>>> Brackets(y,1).starts
array([ 0,  1,  2,  5,  7, 10, 12, 16, 18])

"""

def __init__(Brackets self, object data, double increment=1,
object operator=np.less_equal):
"""
"""
cdef int i, n, ifirst, ilast, test
cdef double last
cdef list starts, ends
#
self.increment = increment
self.data = np.asanyarray(data)
data = np.asarray(data)
#
n = len(data)
starts = []
ends = []
#
last = data[0]
ifirst = 0
ilast = 0
for 1 <= i < n:
test = operator(abs(data[i] - last), increment)
ilast = i
if not test:
starts.append(ifirst)
ends.append(ilast-1)
ifirst = i
last = data[i]
starts.append(ifirst)
ends.append(n-1)
self._starts = starts
self._ends = ends

def __len__(self):
return len(self.starts)

property starts:
#
def __get__(Brackets self):
return np.asarray(self._starts)

property ends:
#
def __get__(Brackets self):
return np.asarray(self._ends)

property sizes:
#
def __get__(Brackets self):
return np.asarray(self._ends) - np.asarray(self._firsts)

property slices:
#
def __get__(Brackets self):
cdef int i
cdef list starts = self._starts, ends = self._ends
cdef list slices = []
for 0 <= i < len(starts):
slices.append(slice(starts[i], ends[i]+1))
return slices

property clustered:
#
def __get__(self):
cdef int i
cdef list starts = self._starts, ends = self._ends
cdef list groups = []
cdef np.ndarray data = self.data
for 0 <= i < len(starts):
groups.append(data[starts[i]:ends[i]+1])
return groups

property uniques:
def __get__(self):
return self.data[self.starts]

def grouped_slices(self):
"""
Returns a dictionary with the unique values of ``self`` as keys,
and a list
of slices for the corresponding values.

--------
Brackets.grouped_limits
that does the same thing
"""
# Define shortcuts
cdef int i, ifirst, n = len(self)
cdef list starts = self._starts, ends = self._ends
cdef np.ndarray data = self.data
# Define new variables
cdef list seen = []
cdef double value
cdef dict grouped = {}
for 0 <= i < n:
ifirst = starts[i]
value = data[ifirst]
if not (value in seen):
grouped[value] = []
seen.append(value)
grouped[value].append(slice(ifirst, ends[i]+1))
return grouped

def grouped_limits(self):
"""
Returns a dictionary with the unique values of ``self`` as keys,
and a list
of tuples (starting index, ending index) for the corresponding
values.

--------
Cluster.grouped_slices
"""
# Define shortcuts
cdef int i, ifirst, n = len(self)
cdef list starts = self.starts, ends = self.ends
cdef np.ndarray data = self.data
# Define new variables
cdef list seen = []
cdef double value
cdef dict grouped = {}
for 0 <= i < n:
ifirst = starts[i]
value = data[ifirst]
if not (value in seen):
grouped[value] = []
seen.append(value)
grouped[value].append((ifirst, ends[i]+1))
for k in grouped:
grouped[k] = np.asarray(grouped[k])
return grouped

```