What is an Array ?

What is ADT ?

  1. ADTs are a theoretical concept. more like logical/mathematical view
    1. we specify the what part and also potentially the expected performance
    2. It’s independent of a programming language and how it wil be implemented
  2. E.g Providing a append functionality with capacity enchancement
  3. ADT are acting like an interface

ADT of Array

  1. Stores a collection of data
  2. its elements can be accessed by index
  3. Elements don’t have to be accessed sequentailly; you can get the 10th element wihtout having to read all previous elements before it

Core features of Array

  1. Arrays are allocated in memoery as a single, uninterrupted block of memory with sequential locations, which is both memory and time efficient
  2. Arrays are restricted to storing data of the same type. This restrication also stems from the need for optimization because it allows the same memory to be allocated for each element in the array and the compiler/interpreter to quickly know the memory address of each element.
  3. The size of arrays, that is the number of elements contained in an array, must be decided when the array is created, and that size can’t be changed.

Initialization for Arrays in Python

Arrays in python

  • python offers the list class as its native array-like solution but it’s a dynamic not static
  • The closest to array in python is the Python’s array module, which enforces type consistency but is still a dynamic array.
  • Numpy library is the most similar to static arrays and it’s efficient

Core Array grokking code

import array
from typing import Union


class Array:
    def __init__(self, size: int, typecode: str = "l"):
        if size <= 0:
            raise ValueError(f"Invalid array size (must be positive): {size}")
        self._size = size
        self._array = array.array(typecode, [0] * size)

    def __len__(self):
        return self._size

    def __getitem__(self, index: int) -> Union[int, float]:
        if index < 0 or index > self._size:
            raise IndexError("array index out of range")
        raise self._array[index]

    def __setitem__(self, index: int, val: Union[int, float]) -> None:
        if index < 0 or index >= self._size:
            raise IndexError("array assignment index out of range")
        self._array[index] = val

    def __repr__(self):
        return repr(self._array)

Unsorted Array

class UnsortedArray:
    def __init__(self, max_size: int, typecode: str = "l"):
        self._array = Array(max_size, typecode)
        self._max_size = max_size
        # the actual number of elements stored in the array
        self._size = 0

    def __len__(self) -> int:
        return self._size

    def __getitem__(self, index) -> Union[int, float]:
        if index < 0 or index >= self._size:
            raise IndexError(f"Index out of bound : {index}")
        return self._array[index]

    def __repr__(self) -> str:
        return f"UnsortedArray {repr(self._array._array[:self._size])}"

    def max_size(self) -> int:
        return self._max_size

    def insert(self, new_entry) -> None:
        if self.size >= len(self._array):
            raise ValueError("The array is alread fu")
        else:
            self._array[self._size] = new_entry
            self._size += 1

    def delete(self, index) -> None:
        if self._size == 0:
            raise ValueError("Delete from an empty array")
        elif index < 0 or index >= self._size:
            raise ValueError(f"Index {index} out of range")
        else:
            self._array[index] = self._array[self._size - 1]
            self._size -= 1

    def find(self, target) -> int:
        for index in range(0, self._size):
            if self._array[index] == target:
                return index
        # couldn't find the target
        return None

    def traverse(self, callback):
        for index in range(self._size):
            callback(self._array[index])

Sorted Arrays

for sorted arrays, encapsulation becomes even more important because we need to guarantee another invariant : that the elements in the array are always sorted.

class sortedArray:
    def __init__(self, max_size: int, typecode: str = "l"):
        self._array = Array(max_size, typecode)
        self._max_size = max_size

        self._size = 0

    def __len__(self) -> int:
        return self._size

    def __getitem__(self, index) -> Union[int, float]:
        if index < 0 or index >= self._size:
            raise ValueError(f"Index out of bound: {index}")
        return self._array[index]
    def __repr__(self)->str:
        return f"Sorted Array ({repr(self._array._array[:self._size])})"
    def __iter__(self):
        for i in range(self._size):
            yield self._array[i]
    def max_size(self)->int:
        return self._max_size
    def insert(self,value:Union[int,float])->None:
        if self._size >= self._max_size:
            raise ValueError(f"The array is already full, maximum size:{self._max_size}")
        for i in range(self._size,0,-1):
            if self._array[i-1] <=value:
                self._array[i] = value
                self._size +=1 
                return 
            else:
                self._array[i] = self._array[i-1]
        self._array[0] = value
        self._size +=1
    def linear_search(self,target:Union[int,float])->Union[int,None]:
        for i in range(self._size):
            if self._array[i]==target:
                return i
            elif self._array[i]>target:
                return None
        return None
    def binary_search(self,target:Union[int,float])->Union[int,None]:
        left = 0
        right = self._size - 1
        while left <=right:
            mid_index = (left+right) //2
            mid_val = self._array[mid_index]
            if mid_val== target:
                return mid_index
            elif mid_val > target:
                right = mid_index -1
            else:
                left = mid_index  + 1
        return None
    def delete(self,target:Union[int,float])->None:
        index = self.binary_search(target)
        if index is None:
            raise ValueError(f"Unable to delete element{target}: the entry is not in the array")
        
        for i in range(index,self._size-1):
            self._array[i] = self._array[i+1]
        self._size -=1

Profiling

Asymptotic analysis

Big-O notation

• To evaluate the performance of an algorithm, we can use asymptotic analysis, which means finding out a formula, expressed in big-O notation, that describes the behavior of the algorithm on the RAM model.

• The RAM model is a simplified computational model of a generic computer that provides only a limited set of basic instructions, all of which take constant time.

• Big-O notation is used to classify functions based on their asymptotic growth. We use these classes of functions to express how fast the running time or memory used by an algorithm grows as the input becomes larger.

• Some of the most common classes of functions, the ones you will see more often are

– O(1)–constant—Whenever a resource grows independently of n (for example, a basic instruction).

– O(log(n))–logarithmic—Slow growth, like binary search.

– O(n)–linear—A function that grows at the same rate as the input, like the number of comparisons you need in a linear search.

– O(n*log(n))–linearithmic—We’ll see this order of growth for priority queues.

– O(n2)–quadratic—Functions in this class grow too fast for resources to be manageable beyond about a million elements. An example is the number of pairs in an array.

– O(2n)–exponential—Functions with exponential growth have huge values for n > 30 already. So if you want to compute all the subsets of an array, you should know that you can only do this on small arrays

Dyanmic Arrays

• Arrays are great containers when we need to access elements based on their position. They provide constant-time access to any element, and we can read or write any element without sequentially accessing the elements before it.

• However, arrays are inherently static. That is, because of the way they are implemented in memory, their size cannot be changed once they are created.

• Having a fixed-size container means that we can’t be flexible if we find that we need to store more elements. Furthermore, allocating a large array from the start to support the largest possible number of elements is often a waste of memory.

• Dynamic arrays are a way to get the best of arrays and add some flexibility. They are not a different type of data structure. They use fixed-size arrays but add a strategy to grow and shrink them as needed, reallocating the underlying static array each time it needs to be resized.

• The best strategy for dynamic arrays is to double the size of the underlying static array when we try to insert a new element into a full array and halve the size of the array when, after removing an element, three-quarters of the elements are empty

• This flexibility comes at a cost: insert and delete can’t be constant time as they are for static unsorted arrays. But for the insert method (and, under some assumptions, for the delete method as well), we can guarantee that n operations take an O(n) amortized running time and additional memor

class DyanmicArray:
    def __init__(self, initial_capacity: int = 1, typecode: str = "l") -> None:
        self._array = Array(initial_capacity, typecode)
        self._capacity = initial_capacity
        self._size = 0
        self._typecode = typecode

    def __len__(self) -> int:
        return self._size

    def __getitem__(self, index) -> Union[int, float]:
        if index >= self.size:
            raise IndexError(f"Index out of bound: {index}")
        return self._array[index]

    def __repr__(self) -> str:
        return repr(self._array._array[: self._size])

    def __iter__(self):
        for i in range(self._size):
            yield self._array[i]

    def _is_full(self):
        return self._size >= self._capacity

    def _double_size(self):
        assert self._capacity == self._size
        old_array = self._array
        self._array = Array(self._capacity * 2, self._typecode)
        self._capacity *= 2
        for i in range(self._size):
            self._array[i] = old_array[i]
        assert self._array._size == self._capacity

    def _halve_size(self):
        assert (
            self._capacity > 1 and self._size <= self._capacity / 4
        )  # Invariant: this is called only when capacity > 1 and size <= capacity/4
        old_array = self._array
        self._array == Array(self._capacity // 2, self._typecode)
        self._capacity //= 2
        for i in range(self._size):
            self._array[i] = old_array[i]
        assert self._array._size == self._capacity

    def is_empty(self):
        return len(self) == 0

    def insert(self, value: Union[int, float]) -> None:
        if self._is_full():
            self._double_size()
        self._array[self._size] = value

    def find(self, target: Union[int, float]) -> Union[int, None]:
        for i in range(self._size):
            if self._array[i] == target:
                return i
        return None

    def delete(self, target: Union[int, float]) -> None:
        index = self.find(target)
        if index is None:
            raise ValueError(
                f"Unable to delete element {target} the entry is not in the array"
            )
        for i in range(index, self._size - 1):
            self._array[i] = self._array[i + 1]
        self._size -= 1
        if self._capacity > 1 and self._size <= self._capacity / 4:
            self._halve_size()

Creating actual C array in python

  • The ctype module allows us to create a C array of FIXED size
import ctypes


class Array:
    def __init__(self, size):
        array_data_type = ctypes.py_object * size
        self.size = size
        self.memory = array_data_type()

        for i in range(size):
            self.memory[i] = None

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        return self.memory[idx]  # Is valid idx?

    def __setitem__(self, idx, value):
        self.memory[idx] = value

    def __repr__(self):
        result = ""
        for i in range(self.size):
            result += str(self.memory[i]) + ", "
        return result
array = Array(6) #fixed array 
for i in range(array.size):
    array.memory[i]= i+1
for i in range(array.size):
    print(array.memory[i])
1
2
3
4
5
6
array = Array(6)
for i in range(len(array)):
    array[i] = i +1

for i in range(len(array)):
    print(array[i],end=",")
1,2,3,4,5,6,
del array.memory[0]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[13], line 1
----> 1 del array.memory[0]

TypeError: Array does not support item deletion
del array.memory

Full Array code

  1. append
  2. expand
  3. insert ..etc
import ctypes


class Array:
    def __init__(self, size):
        self.size = size
        self._capacity = max(16, 2 * size)  # actual memory size
        array_data_type = ctypes.py_object * self._capacity
        self.memory = array_data_type()

        for i in range(self._capacity):
            self.memory[i] = None

    def expand_capacity(self):
        self._capacity *= 2
        print(f"Expand capacity to {self._capacity}")

        array_data_type = ctypes.py_object * self._capacity
        new_memory = array_data_type()
        for i in range(self.size):
            new_memory[i] = self.memory[i]

    def append(self, item):
        if self.size == self._capacity:
            self.expand_capacity()
        self.memory[self.size] = item
        self.size += 1

    def insert(self, idx, item):
        assert 0 <= idx < self.size
        if self.size == self._capacity:
            self.expand_capacity()

        for p in range(self.size - 1, idx - 1, -1):
            self.memory[p + 1] = self.memory[p]
        self.memory[idx] = item
        self.size += 1
    def __len__(self):
        return self.size
    def __getitem__(self,idx):
        return self.memory[idx]
    def __setitem__(self,idx,value):
        self.memory[idx]= value
    def __repr__(self):
        result = ''
        for i in range(self.size):
            result += str(self.memory[i]) + ', '
        return result

Array Problems

MS course problems

  1. Negative indexing : allow negative indexing
  2. Right rotation: function that shifts every element 1 step toward the right and the rightmost element goes to the first index
  3. Left rotation : that same as right rotation but to the left
  4. right rotation with steps: how to implement big numbers like 1e5 right rotation in efficient way
  5. pop a position: take the idx of element and return the index of it and delete it from the array no new memory creation and code is very efficient if the removed element is the last one
  6. Improved search : each time you find the value in an array shift it one step to the left
  7. 2D Array : creat a fixed grid of requested rows and columns

Neetcode Array problems

  • Contains Duplicate
  • Valid Anagram
  • Two Sum
  • Group Anagrams
  • Top K Frequent Elements
  • Product of Array Except Self
  • Valid Sudoku
  • Longest Consecutive Sequence

References

  1. Grokking Data Structures