Source code for pyxc.core.processor.reducer

import numpy as np
from numpy.typing import ArrayLike


[docs]class Reducer(list): """ The Reducer class is designed to perform reductions on structured arrays. This class is particularly useful when working with large amounts of data and a subset of information or simplified representation is needed. The `reduce` method transforms the structured array to a simpler form based on the specified functions/callables added to the Reducer list. An instance of the Reducer class is a list of functions or tuples with two elements: a function and a list of columns from the array that the function should be applied to. The functions in the list are applied to the structured array in the order they are added to the list. Methods ------- reduce(array: ArrayLike) -> np.ndarray Apply the functions in the Reducer list to the specified array and return the reduced array. Notes ----- The structured array must be a 2D structured NumPy array. The structured array is a powerful tool that allows you to manipulate data with different types and sizes. It's like a 2D table with each column possibly of a different type. The output of the reduce method is a structured array with new columns corresponding to the results of the applied functions. For example, if the function 'np.mean' is applied to the columns 'x' and 'y' of the array, the output will include the columns 'x_mean' and 'y_mean'. """
[docs] def reduce(self, array: np.ndarray) -> np.ndarray: final_dtype = list( [ ("count", int), ("query_index", int), ("x-coordinates", float), ("y-coordinates", float), ("avg_x", float), ("avg_y", float), ] ) final_array = list( [ len(array), array["query_index"][0], array["x-coordinates"][0], array["y-coordinates"][0], np.mean(array["x"]), np.mean(array["y"]), ] ) for idx, action in enumerate(self): if callable(action): act = action cols = array.dtype.names elif len(action) == 2: act, cols = action for col in cols: if col not in array.dtype.names: raise ValueError( f"Error: A given column name {col} not in array." ) else: raise ValueError( "Error: Only two elements are allowed for each reducer element." ) suffix = act.__name__ reduced_value = [act(array[column]) for column in cols] reduced_value_dtype = [ (f"{name}_{suffix}", array.dtype[name]) for name in array.dtype.names if name in cols ] final_dtype += reduced_value_dtype final_array += reduced_value return np.array( [ tuple(final_array), ], np.dtype(final_dtype), )