Coverage for src/ipyvizzu/data/converters/numpy/converter.py: 100%

67 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-12 08:13 +0000

1""" 

2This module provides the `NumpyArrayConverter` class, 

3which allows converting a `numpy` `array` 

4into a list of dictionaries representing series. 

5""" 

6 

7from types import ModuleType 

8from typing import Dict, List, Optional, Tuple, Union 

9 

10from ipyvizzu.data.converters.defaults import NAN_DIMENSION, NAN_MEASURE 

11from ipyvizzu.data.converters.converter import ToSeriesListConverter 

12from ipyvizzu.data.converters.numpy.type_alias import ( 

13 ColumnConfig, 

14 ColumnDtype, 

15 ColumnName, 

16 DType, 

17 Index, 

18 Name, 

19) 

20from ipyvizzu.data.infer_type import InferType 

21from ipyvizzu.data.type_alias import ( 

22 DimensionValue, 

23 MeasureValue, 

24 Series, 

25 SeriesValues, 

26) 

27 

28 

29class NumpyArrayConverter(ToSeriesListConverter): 

30 """ 

31 Converts a `numpy` `array` into a list of dictionaries representing series. 

32 Each dictionary contains information about the series `name`, `values` and `type`. 

33 

34 Parameters: 

35 np_array: The `numpy` `array` to convert. 

36 column_name: 

37 The name of a column. By default, uses column indices. Can be set with an 

38 Index:Name pair or, for single-dimensional arrays, with just the Name. 

39 column_dtype: 

40 The dtype of a column. By default, uses the np_array's dtype. Can be set 

41 with an Index:DType pair or, for single-dimensional arrays, with just the DType. 

42 default_measure_value: 

43 Default value to use for missing measure values. Defaults to 0. 

44 default_dimension_value: 

45 Default value to use for missing dimension values. Defaults to an empty string. 

46 

47 Example: 

48 Get series list from `numpy` `array`: 

49 

50 converter = NumpyArrayConverter(np_array) 

51 series_list = converter.get_series_list() 

52 """ 

53 

54 # pylint: disable=too-few-public-methods 

55 

56 def __init__( 

57 self, 

58 np_array: "numpy.array", # type: ignore 

59 column_name: Optional[ColumnName] = None, 

60 column_dtype: Optional[ColumnDtype] = None, 

61 default_measure_value: MeasureValue = NAN_MEASURE, 

62 default_dimension_value: DimensionValue = NAN_DIMENSION, 

63 ) -> None: 

64 # pylint: disable=too-many-arguments 

65 

66 super().__init__(default_measure_value, default_dimension_value) 

67 self._np = self._get_numpy() 

68 self._np_array = np_array 

69 self._column_name: Dict[Index, Name] = self._get_columns_config(column_name) 

70 self._column_dtype: Dict[Index, DType] = self._get_columns_config(column_dtype) 

71 

72 def get_series_list(self) -> List[Series]: 

73 """ 

74 Convert the `numpy` `array` to a list of dictionaries representing series. 

75 

76 Returns: 

77 A list of dictionaries representing series, 

78 where each dictionary has `name`, `values` and `type` keys. 

79 """ 

80 

81 if self._np_array.ndim == 0: 

82 return [] 

83 if self._np_array.ndim == 1: 

84 return self._get_series_list_from_array1dim() 

85 if self._np_array.ndim == 2: 

86 return self._get_series_list_from_array2dim() 

87 raise ValueError("arrays larger than 2D are not supported") 

88 

89 def _get_series_list_from_array1dim(self) -> List[Series]: 

90 i = 0 

91 name = self._column_name.get(i, i) 

92 values, infer_type = self._convert_to_series_values_and_type( 

93 (i, self._np_array) 

94 ) 

95 return [self._convert_to_series(name, values, infer_type)] 

96 

97 def _get_series_list_from_array2dim(self) -> List[Series]: 

98 series_list = [] 

99 for i in range(self._np_array.shape[1]): 

100 name = self._column_name.get(i, i) 

101 values, infer_type = self._convert_to_series_values_and_type( 

102 (i, self._np_array[:, i]) 

103 ) 

104 series_list.append(self._convert_to_series(name, values, infer_type)) 

105 return series_list 

106 

107 def _get_numpy(self) -> ModuleType: 

108 try: 

109 import numpy as np # pylint: disable=import-outside-toplevel 

110 

111 return np 

112 except ImportError as error: 

113 raise ImportError( 

114 "numpy is not available. Please install numpy to use this feature." 

115 ) from error 

116 

117 def _get_columns_config( 

118 self, 

119 config: Optional[Union[ColumnConfig, Dict[Index, ColumnConfig]]], 

120 ) -> Dict[Index, ColumnConfig]: 

121 if config is None: 

122 return {} 

123 if not isinstance(config, dict): 

124 if not self._np_array.ndim == 1: 

125 raise ValueError("non dict value can only be used for a 1D array") 

126 return {0: config} 

127 return config 

128 

129 def _convert_to_series_values_and_type( 

130 self, obj: Tuple[int, "numpy.array"] # type: ignore 

131 ) -> Tuple[SeriesValues, InferType]: 

132 column = obj 

133 i = column[0] 

134 array = column[1] 

135 dtype = self._column_dtype.get(i, self._np_array.dtype) 

136 if self._np.issubdtype(dtype, self._np.number): 

137 return self._convert_to_measure_values(array), InferType.MEASURE 

138 return self._convert_to_dimension_values(array), InferType.DIMENSION 

139 

140 def _convert_to_measure_values( 

141 self, obj: "numpy.array" # type: ignore 

142 ) -> List[MeasureValue]: 

143 array = obj 

144 array_float = array.astype(float) 

145 return self._np.nan_to_num( 

146 array_float, nan=self._default_measure_value 

147 ).tolist() 

148 

149 def _convert_to_dimension_values( 

150 self, obj: "numpy.array" # type: ignore 

151 ) -> List[DimensionValue]: 

152 array = obj 

153 array_str = array.astype(str) 

154 replace_nan = "nan" 

155 mask = array_str == replace_nan 

156 array_str[mask] = self._default_dimension_value 

157 return array_str.tolist()