Coverage for src/ipyvizzu/data/converters/numpy/converter.py: 100%
70 statements
« prev ^ index » next coverage.py v7.4.3, created at 2024-02-26 10:12 +0000
« prev ^ index » next coverage.py v7.4.3, created at 2024-02-26 10:12 +0000
1"""
2This module provides the `NumpyArrayConverter` class,
3which allows converting a `numpy` `array`
4into a list of dictionaries representing series.
5"""
7from types import ModuleType
8from typing import Dict, List, Optional, Tuple, Union
10from ipyvizzu.data.converters.defaults import NAN_DIMENSION, NAN_MEASURE
11from ipyvizzu.data.converters.converter import ToSeriesListConverter
12from ipyvizzu.data.converters.numpy.type_alias import (
13 ColumnConfig,
14 ColumnDtype,
15 ColumnName,
16 ColumnUnit,
17 DType,
18 Index,
19 Name,
20 Unit,
21)
22from ipyvizzu.data.infer_type import InferType
23from ipyvizzu.data.type_alias import (
24 DimensionValue,
25 MeasureValue,
26 Series,
27 SeriesValues,
28)
31class NumpyArrayConverter(ToSeriesListConverter):
32 """
33 Converts a `numpy` `array` into a list of dictionaries representing series.
34 Each dictionary contains information about the series `name`, `values` and `type`.
36 Parameters:
37 np_array: The `numpy` `array` to convert.
38 column_name:
39 The name of a column. By default, uses column indices. Can be set with an
40 Index:Name pair or, for single-dimensional arrays, with just the Name.
41 column_dtype:
42 The dtype of a column. By default, uses the np_array's dtype. Can be set
43 with an Index:DType pair or, for single-dimensional arrays, with just the DType.
44 default_measure_value:
45 Default value to use for missing measure values. Defaults to 0.
46 default_dimension_value:
47 Default value to use for missing dimension values. Defaults to an empty string.
49 Example:
50 Get series list from `numpy` `array`:
52 converter = NumpyArrayConverter(np_array)
53 series_list = converter.get_series_list()
54 """
56 # pylint: disable=too-few-public-methods
58 def __init__(
59 self,
60 np_array: "numpy.array", # type: ignore
61 column_name: Optional[ColumnName] = None,
62 column_dtype: Optional[ColumnDtype] = None,
63 column_unit: Optional[ColumnUnit] = None,
64 default_measure_value: MeasureValue = NAN_MEASURE,
65 default_dimension_value: DimensionValue = NAN_DIMENSION,
66 ) -> None:
67 # pylint: disable=too-many-arguments
69 super().__init__(default_measure_value, default_dimension_value)
70 self._np = self._get_numpy()
71 self._np_array = np_array
72 self._column_name: Dict[Index, Name] = self._get_columns_config(column_name)
73 self._column_dtype: Dict[Index, DType] = self._get_columns_config(column_dtype)
74 self._column_unit: Dict[Index, Unit] = self._get_columns_config(column_unit)
76 def get_series_list(self) -> List[Series]:
77 """
78 Convert the `numpy` `array` to a list of dictionaries representing series.
80 Returns:
81 A list of dictionaries representing series,
82 where each dictionary has `name`, `values` and `type` keys.
83 """
85 if self._np_array.ndim == 0:
86 return []
87 if self._np_array.ndim == 1:
88 return self._get_series_list_from_array1dim()
89 if self._np_array.ndim == 2:
90 return self._get_series_list_from_array2dim()
91 raise ValueError("arrays larger than 2D are not supported")
93 def _get_series_list_from_array1dim(self) -> List[Series]:
94 i = 0
95 name = self._column_name.get(i, i)
96 unit = self._column_unit.get(i, None)
97 values, infer_type = self._convert_to_series_values_and_type(
98 (i, self._np_array)
99 )
100 return [self._convert_to_series(name, values, infer_type, unit)]
102 def _get_series_list_from_array2dim(self) -> List[Series]:
103 series_list = []
104 for i in range(self._np_array.shape[1]):
105 name = self._column_name.get(i, i)
106 unit = self._column_unit.get(i, None)
107 values, infer_type = self._convert_to_series_values_and_type(
108 (i, self._np_array[:, i])
109 )
110 series_list.append(self._convert_to_series(name, values, infer_type, unit))
111 return series_list
113 def _get_numpy(self) -> ModuleType:
114 try:
115 import numpy as np # pylint: disable=import-outside-toplevel
117 return np
118 except ImportError as error:
119 raise ImportError(
120 "numpy is not available. Please install numpy to use this feature."
121 ) from error
123 def _get_columns_config(
124 self,
125 config: Optional[Union[ColumnConfig, Dict[Index, ColumnConfig]]],
126 ) -> Dict[Index, ColumnConfig]:
127 if config is None:
128 return {}
129 if not isinstance(config, dict):
130 if not self._np_array.ndim == 1:
131 raise ValueError("non dict value can only be used for a 1D array")
132 return {0: config}
133 return config
135 def _convert_to_series_values_and_type(
136 self, obj: Tuple[int, "numpy.array"] # type: ignore
137 ) -> Tuple[SeriesValues, InferType]:
138 column = obj
139 i = column[0]
140 array = column[1]
141 dtype = self._column_dtype.get(i, self._np_array.dtype)
142 if self._np.issubdtype(dtype, self._np.number):
143 return self._convert_to_measure_values(array), InferType.MEASURE
144 return self._convert_to_dimension_values(array), InferType.DIMENSION
146 def _convert_to_measure_values(
147 self, obj: "numpy.array" # type: ignore
148 ) -> List[MeasureValue]:
149 array = obj
150 array_float = array.astype(float)
151 return self._np.nan_to_num(
152 array_float, nan=self._default_measure_value
153 ).tolist()
155 def _convert_to_dimension_values(
156 self, obj: "numpy.array" # type: ignore
157 ) -> List[DimensionValue]:
158 array = obj
159 array_str = array.astype(str)
160 replace_nan = "nan"
161 mask = array_str == replace_nan
162 array_str[mask] = self._default_dimension_value
163 return array_str.tolist()