Coverage for src/ipyvizzu/data/converters/numpy/converter.py: 100%

70 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-02-26 10:12 +0000

1""" 

2This module provides the `NumpyArrayConverter` class, 

3which allows converting a `numpy` `array` 

4into a list of dictionaries representing series. 

5""" 

6 

7from types import ModuleType 

8from typing import Dict, List, Optional, Tuple, Union 

9 

10from ipyvizzu.data.converters.defaults import NAN_DIMENSION, NAN_MEASURE 

11from ipyvizzu.data.converters.converter import ToSeriesListConverter 

12from ipyvizzu.data.converters.numpy.type_alias import ( 

13 ColumnConfig, 

14 ColumnDtype, 

15 ColumnName, 

16 ColumnUnit, 

17 DType, 

18 Index, 

19 Name, 

20 Unit, 

21) 

22from ipyvizzu.data.infer_type import InferType 

23from ipyvizzu.data.type_alias import ( 

24 DimensionValue, 

25 MeasureValue, 

26 Series, 

27 SeriesValues, 

28) 

29 

30 

31class NumpyArrayConverter(ToSeriesListConverter): 

32 """ 

33 Converts a `numpy` `array` into a list of dictionaries representing series. 

34 Each dictionary contains information about the series `name`, `values` and `type`. 

35 

36 Parameters: 

37 np_array: The `numpy` `array` to convert. 

38 column_name: 

39 The name of a column. By default, uses column indices. Can be set with an 

40 Index:Name pair or, for single-dimensional arrays, with just the Name. 

41 column_dtype: 

42 The dtype of a column. By default, uses the np_array's dtype. Can be set 

43 with an Index:DType pair or, for single-dimensional arrays, with just the DType. 

44 default_measure_value: 

45 Default value to use for missing measure values. Defaults to 0. 

46 default_dimension_value: 

47 Default value to use for missing dimension values. Defaults to an empty string. 

48 

49 Example: 

50 Get series list from `numpy` `array`: 

51 

52 converter = NumpyArrayConverter(np_array) 

53 series_list = converter.get_series_list() 

54 """ 

55 

56 # pylint: disable=too-few-public-methods 

57 

58 def __init__( 

59 self, 

60 np_array: "numpy.array", # type: ignore 

61 column_name: Optional[ColumnName] = None, 

62 column_dtype: Optional[ColumnDtype] = None, 

63 column_unit: Optional[ColumnUnit] = None, 

64 default_measure_value: MeasureValue = NAN_MEASURE, 

65 default_dimension_value: DimensionValue = NAN_DIMENSION, 

66 ) -> None: 

67 # pylint: disable=too-many-arguments 

68 

69 super().__init__(default_measure_value, default_dimension_value) 

70 self._np = self._get_numpy() 

71 self._np_array = np_array 

72 self._column_name: Dict[Index, Name] = self._get_columns_config(column_name) 

73 self._column_dtype: Dict[Index, DType] = self._get_columns_config(column_dtype) 

74 self._column_unit: Dict[Index, Unit] = self._get_columns_config(column_unit) 

75 

76 def get_series_list(self) -> List[Series]: 

77 """ 

78 Convert the `numpy` `array` to a list of dictionaries representing series. 

79 

80 Returns: 

81 A list of dictionaries representing series, 

82 where each dictionary has `name`, `values` and `type` keys. 

83 """ 

84 

85 if self._np_array.ndim == 0: 

86 return [] 

87 if self._np_array.ndim == 1: 

88 return self._get_series_list_from_array1dim() 

89 if self._np_array.ndim == 2: 

90 return self._get_series_list_from_array2dim() 

91 raise ValueError("arrays larger than 2D are not supported") 

92 

93 def _get_series_list_from_array1dim(self) -> List[Series]: 

94 i = 0 

95 name = self._column_name.get(i, i) 

96 unit = self._column_unit.get(i, None) 

97 values, infer_type = self._convert_to_series_values_and_type( 

98 (i, self._np_array) 

99 ) 

100 return [self._convert_to_series(name, values, infer_type, unit)] 

101 

102 def _get_series_list_from_array2dim(self) -> List[Series]: 

103 series_list = [] 

104 for i in range(self._np_array.shape[1]): 

105 name = self._column_name.get(i, i) 

106 unit = self._column_unit.get(i, None) 

107 values, infer_type = self._convert_to_series_values_and_type( 

108 (i, self._np_array[:, i]) 

109 ) 

110 series_list.append(self._convert_to_series(name, values, infer_type, unit)) 

111 return series_list 

112 

113 def _get_numpy(self) -> ModuleType: 

114 try: 

115 import numpy as np # pylint: disable=import-outside-toplevel 

116 

117 return np 

118 except ImportError as error: 

119 raise ImportError( 

120 "numpy is not available. Please install numpy to use this feature." 

121 ) from error 

122 

123 def _get_columns_config( 

124 self, 

125 config: Optional[Union[ColumnConfig, Dict[Index, ColumnConfig]]], 

126 ) -> Dict[Index, ColumnConfig]: 

127 if config is None: 

128 return {} 

129 if not isinstance(config, dict): 

130 if not self._np_array.ndim == 1: 

131 raise ValueError("non dict value can only be used for a 1D array") 

132 return {0: config} 

133 return config 

134 

135 def _convert_to_series_values_and_type( 

136 self, obj: Tuple[int, "numpy.array"] # type: ignore 

137 ) -> Tuple[SeriesValues, InferType]: 

138 column = obj 

139 i = column[0] 

140 array = column[1] 

141 dtype = self._column_dtype.get(i, self._np_array.dtype) 

142 if self._np.issubdtype(dtype, self._np.number): 

143 return self._convert_to_measure_values(array), InferType.MEASURE 

144 return self._convert_to_dimension_values(array), InferType.DIMENSION 

145 

146 def _convert_to_measure_values( 

147 self, obj: "numpy.array" # type: ignore 

148 ) -> List[MeasureValue]: 

149 array = obj 

150 array_float = array.astype(float) 

151 return self._np.nan_to_num( 

152 array_float, nan=self._default_measure_value 

153 ).tolist() 

154 

155 def _convert_to_dimension_values( 

156 self, obj: "numpy.array" # type: ignore 

157 ) -> List[DimensionValue]: 

158 array = obj 

159 array_str = array.astype(str) 

160 replace_nan = "nan" 

161 mask = array_str == replace_nan 

162 array_str[mask] = self._default_dimension_value 

163 return array_str.tolist()