Coverage for src/ipyvizzu/data/converters/numpy/converter.py: 100%
67 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 08:13 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 08:13 +0000
1"""
2This module provides the `NumpyArrayConverter` class,
3which allows converting a `numpy` `array`
4into a list of dictionaries representing series.
5"""
7from types import ModuleType
8from typing import Dict, List, Optional, Tuple, Union
10from ipyvizzu.data.converters.defaults import NAN_DIMENSION, NAN_MEASURE
11from ipyvizzu.data.converters.converter import ToSeriesListConverter
12from ipyvizzu.data.converters.numpy.type_alias import (
13 ColumnConfig,
14 ColumnDtype,
15 ColumnName,
16 DType,
17 Index,
18 Name,
19)
20from ipyvizzu.data.infer_type import InferType
21from ipyvizzu.data.type_alias import (
22 DimensionValue,
23 MeasureValue,
24 Series,
25 SeriesValues,
26)
29class NumpyArrayConverter(ToSeriesListConverter):
30 """
31 Converts a `numpy` `array` into a list of dictionaries representing series.
32 Each dictionary contains information about the series `name`, `values` and `type`.
34 Parameters:
35 np_array: The `numpy` `array` to convert.
36 column_name:
37 The name of a column. By default, uses column indices. Can be set with an
38 Index:Name pair or, for single-dimensional arrays, with just the Name.
39 column_dtype:
40 The dtype of a column. By default, uses the np_array's dtype. Can be set
41 with an Index:DType pair or, for single-dimensional arrays, with just the DType.
42 default_measure_value:
43 Default value to use for missing measure values. Defaults to 0.
44 default_dimension_value:
45 Default value to use for missing dimension values. Defaults to an empty string.
47 Example:
48 Get series list from `numpy` `array`:
50 converter = NumpyArrayConverter(np_array)
51 series_list = converter.get_series_list()
52 """
54 # pylint: disable=too-few-public-methods
56 def __init__(
57 self,
58 np_array: "numpy.array", # type: ignore
59 column_name: Optional[ColumnName] = None,
60 column_dtype: Optional[ColumnDtype] = None,
61 default_measure_value: MeasureValue = NAN_MEASURE,
62 default_dimension_value: DimensionValue = NAN_DIMENSION,
63 ) -> None:
64 # pylint: disable=too-many-arguments
66 super().__init__(default_measure_value, default_dimension_value)
67 self._np = self._get_numpy()
68 self._np_array = np_array
69 self._column_name: Dict[Index, Name] = self._get_columns_config(column_name)
70 self._column_dtype: Dict[Index, DType] = self._get_columns_config(column_dtype)
72 def get_series_list(self) -> List[Series]:
73 """
74 Convert the `numpy` `array` to a list of dictionaries representing series.
76 Returns:
77 A list of dictionaries representing series,
78 where each dictionary has `name`, `values` and `type` keys.
79 """
81 if self._np_array.ndim == 0:
82 return []
83 if self._np_array.ndim == 1:
84 return self._get_series_list_from_array1dim()
85 if self._np_array.ndim == 2:
86 return self._get_series_list_from_array2dim()
87 raise ValueError("arrays larger than 2D are not supported")
89 def _get_series_list_from_array1dim(self) -> List[Series]:
90 i = 0
91 name = self._column_name.get(i, i)
92 values, infer_type = self._convert_to_series_values_and_type(
93 (i, self._np_array)
94 )
95 return [self._convert_to_series(name, values, infer_type)]
97 def _get_series_list_from_array2dim(self) -> List[Series]:
98 series_list = []
99 for i in range(self._np_array.shape[1]):
100 name = self._column_name.get(i, i)
101 values, infer_type = self._convert_to_series_values_and_type(
102 (i, self._np_array[:, i])
103 )
104 series_list.append(self._convert_to_series(name, values, infer_type))
105 return series_list
107 def _get_numpy(self) -> ModuleType:
108 try:
109 import numpy as np # pylint: disable=import-outside-toplevel
111 return np
112 except ImportError as error:
113 raise ImportError(
114 "numpy is not available. Please install numpy to use this feature."
115 ) from error
117 def _get_columns_config(
118 self,
119 config: Optional[Union[ColumnConfig, Dict[Index, ColumnConfig]]],
120 ) -> Dict[Index, ColumnConfig]:
121 if config is None:
122 return {}
123 if not isinstance(config, dict):
124 if not self._np_array.ndim == 1:
125 raise ValueError("non dict value can only be used for a 1D array")
126 return {0: config}
127 return config
129 def _convert_to_series_values_and_type(
130 self, obj: Tuple[int, "numpy.array"] # type: ignore
131 ) -> Tuple[SeriesValues, InferType]:
132 column = obj
133 i = column[0]
134 array = column[1]
135 dtype = self._column_dtype.get(i, self._np_array.dtype)
136 if self._np.issubdtype(dtype, self._np.number):
137 return self._convert_to_measure_values(array), InferType.MEASURE
138 return self._convert_to_dimension_values(array), InferType.DIMENSION
140 def _convert_to_measure_values(
141 self, obj: "numpy.array" # type: ignore
142 ) -> List[MeasureValue]:
143 array = obj
144 array_float = array.astype(float)
145 return self._np.nan_to_num(
146 array_float, nan=self._default_measure_value
147 ).tolist()
149 def _convert_to_dimension_values(
150 self, obj: "numpy.array" # type: ignore
151 ) -> List[DimensionValue]:
152 array = obj
153 array_str = array.astype(str)
154 replace_nan = "nan"
155 mask = array_str == replace_nan
156 array_str[mask] = self._default_dimension_value
157 return array_str.tolist()