Skip to content

Numpy

ipyvizzu.data.converters.numpy

This module provides modules for numpy converter.

ipyvizzu.data.converters.numpy.converter

This module provides the NumpyArrayConverter class, which allows converting a numpy array into a list of dictionaries representing series.

NumpyArrayConverter

Bases: ToSeriesListConverter

Converts a numpy array into a list of dictionaries representing series. Each dictionary contains information about the series name, values and type.

Parameters:

Name Type Description Default
np_array array

The numpy array to convert.

required
column_name Optional[ColumnName]

The name of a column. By default, uses column indices. Can be set with an Index:Name pair or, for single-dimensional arrays, with just the Name.

None
column_dtype Optional[ColumnDtype]

The dtype of a column. By default, uses the np_array's dtype. Can be set with an Index:DType pair or, for single-dimensional arrays, with just the DType.

None
default_measure_value MeasureValue

Default value to use for missing measure values. Defaults to 0.

NAN_MEASURE
default_dimension_value DimensionValue

Default value to use for missing dimension values. Defaults to an empty string.

NAN_DIMENSION
Example

Get series list from numpy array:

converter = NumpyArrayConverter(np_array)
series_list = converter.get_series_list()
Source code in src/ipyvizzu/data/converters/numpy/converter.py
class NumpyArrayConverter(ToSeriesListConverter):
    """
    Converts a `numpy` `array` into a list of dictionaries representing series.
    Each dictionary contains information about the series `name`, `values` and `type`.

    Parameters:
        np_array: The `numpy` `array` to convert.
        column_name:
            The name of a column. By default, uses column indices. Can be set with an
            Index:Name pair or, for single-dimensional arrays, with just the Name.
        column_dtype:
            The dtype of a column. By default, uses the np_array's dtype. Can be set
            with an Index:DType pair or, for single-dimensional arrays, with just the DType.
        default_measure_value:
            Default value to use for missing measure values. Defaults to 0.
        default_dimension_value:
            Default value to use for missing dimension values. Defaults to an empty string.

    Example:
        Get series list from `numpy` `array`:

            converter = NumpyArrayConverter(np_array)
            series_list = converter.get_series_list()
    """

    # pylint: disable=too-few-public-methods

    def __init__(
        self,
        np_array: "numpy.array",  # type: ignore
        column_name: Optional[ColumnName] = None,
        column_dtype: Optional[ColumnDtype] = None,
        column_unit: Optional[ColumnUnit] = None,
        default_measure_value: MeasureValue = NAN_MEASURE,
        default_dimension_value: DimensionValue = NAN_DIMENSION,
    ) -> None:
        # pylint: disable=too-many-arguments

        super().__init__(default_measure_value, default_dimension_value)
        self._np = self._get_numpy()
        self._np_array = np_array
        self._column_name: Dict[Index, Name] = self._get_columns_config(column_name)
        self._column_dtype: Dict[Index, DType] = self._get_columns_config(column_dtype)
        self._column_unit: Dict[Index, Unit] = self._get_columns_config(column_unit)

    def get_series_list(self) -> List[Series]:
        """
        Convert the `numpy` `array` to a list of dictionaries representing series.

        Returns:
            A list of dictionaries representing series,
            where each dictionary has `name`, `values` and `type` keys.
        """

        if self._np_array.ndim == 0:
            return []
        if self._np_array.ndim == 1:
            return self._get_series_list_from_array1dim()
        if self._np_array.ndim == 2:
            return self._get_series_list_from_array2dim()
        raise ValueError("arrays larger than 2D are not supported")

    def _get_series_list_from_array1dim(self) -> List[Series]:
        i = 0
        name = self._column_name.get(i, i)
        unit = self._column_unit.get(i, None)
        values, infer_type = self._convert_to_series_values_and_type(
            (i, self._np_array)
        )
        return [self._convert_to_series(name, values, infer_type, unit)]

    def _get_series_list_from_array2dim(self) -> List[Series]:
        series_list = []
        for i in range(self._np_array.shape[1]):
            name = self._column_name.get(i, i)
            unit = self._column_unit.get(i, None)
            values, infer_type = self._convert_to_series_values_and_type(
                (i, self._np_array[:, i])
            )
            series_list.append(self._convert_to_series(name, values, infer_type, unit))
        return series_list

    def _get_numpy(self) -> ModuleType:
        try:
            import numpy as np  # pylint: disable=import-outside-toplevel

            return np
        except ImportError as error:
            raise ImportError(
                "numpy is not available. Please install numpy to use this feature."
            ) from error

    def _get_columns_config(
        self,
        config: Optional[Union[ColumnConfig, Dict[Index, ColumnConfig]]],
    ) -> Dict[Index, ColumnConfig]:
        if config is None:
            return {}
        if not isinstance(config, dict):
            if not self._np_array.ndim == 1:
                raise ValueError("non dict value can only be used for a 1D array")
            return {0: config}
        return config

    def _convert_to_series_values_and_type(
        self, obj: Tuple[int, "numpy.array"]  # type: ignore
    ) -> Tuple[SeriesValues, InferType]:
        column = obj
        i = column[0]
        array = column[1]
        dtype = self._column_dtype.get(i, self._np_array.dtype)
        if self._np.issubdtype(dtype, self._np.number):
            return self._convert_to_measure_values(array), InferType.MEASURE
        return self._convert_to_dimension_values(array), InferType.DIMENSION

    def _convert_to_measure_values(
        self, obj: "numpy.array"  # type: ignore
    ) -> List[MeasureValue]:
        array = obj
        array_float = array.astype(float)
        return self._np.nan_to_num(
            array_float, nan=self._default_measure_value
        ).tolist()

    def _convert_to_dimension_values(
        self, obj: "numpy.array"  # type: ignore
    ) -> List[DimensionValue]:
        array = obj
        array_str = array.astype(str)
        replace_nan = "nan"
        mask = array_str == replace_nan
        array_str[mask] = self._default_dimension_value
        return array_str.tolist()
get_series_list()

Convert the numpy array to a list of dictionaries representing series.

Returns:

Type Description
List[Series]

A list of dictionaries representing series,

List[Series]

where each dictionary has name, values and type keys.

Source code in src/ipyvizzu/data/converters/numpy/converter.py
def get_series_list(self) -> List[Series]:
    """
    Convert the `numpy` `array` to a list of dictionaries representing series.

    Returns:
        A list of dictionaries representing series,
        where each dictionary has `name`, `values` and `type` keys.
    """

    if self._np_array.ndim == 0:
        return []
    if self._np_array.ndim == 1:
        return self._get_series_list_from_array1dim()
    if self._np_array.ndim == 2:
        return self._get_series_list_from_array2dim()
    raise ValueError("arrays larger than 2D are not supported")

ipyvizzu.data.converters.numpy.type_alias

This module provides typing aliases for numpy converter.

Index = int module-attribute

Represents the index of a column.

Name = str module-attribute

Represents the name of a column.

DType = type module-attribute

Represents the dtype of a column.

Unit = str module-attribute

Represents the unit of a column.

ColumnName = Union[Name, Dict[Index, Name]] module-attribute

Represents a column name. It is a dictionary of Index:Name pairs or for single-dimensional arrays, it can be just a Name.

ColumnDtype = Union[DType, Dict[Index, DType]] module-attribute

Represents a column dtype. It is a dictionary of Index:DType pairs or for single-dimensional arrays, it can be just a DType.

ColumnUnit = Union[Unit, Dict[Index, Unit]] module-attribute

Represents a column unit. It is a dictionary of Index:Unit pairs or for single-dimensional arrays, it can be just a Unit.

ColumnConfig = TypeVar('ColumnConfig', Name, DType, Unit) module-attribute

Represents a column config. It can be Name, DType or Unit.