Converters
            ipyvizzu.data.converters
    This module contains converter classes that offer a user-friendly interface for data conversion,
enabling users to effortlessly transform various data formats into a standardized representation
of series compatible with ipyvizzu.
            ipyvizzu.data.converters.converter
    This module provides the ToSeriesListConverter abstract class.
            ToSeriesListConverter
    
              Bases: ABC
Converts data into a list of dictionaries representing series.
Each dictionary contains information about the series name, values and type.
Source code in src/ipyvizzu/data/converters/converter.py
                class ToSeriesListConverter(ABC):
    """
    Converts data into a list of dictionaries representing series.
    Each dictionary contains information about the series `name`, `values` and `type`.
    """
    # pylint: disable=too-few-public-methods
    def __init__(
        self,
        default_measure_value: MeasureValue,
        default_dimension_value: DimensionValue,
    ) -> None:
        self._default_measure_value = default_measure_value
        self._default_dimension_value = default_dimension_value
    @abstractmethod
    def get_series_list(self) -> List[Series]:
        """
        Convert data to a list of dictionaries representing series.
        Returns:
            A list of dictionaries representing series,
            where each dictionary has `name`, `values` and `type` keys.
        """
    @abstractmethod
    def _convert_to_series_values_and_type(
        self, obj: Any
    ) -> Tuple[SeriesValues, InferType]:
        """
        Convert object to SeriesValues and InferType.
        """
    @abstractmethod
    def _convert_to_measure_values(self, obj: Any) -> List[MeasureValue]:
        """
        Convert object to a list of MeasureValue.
        """
    @abstractmethod
    def _convert_to_dimension_values(self, obj: Any) -> List[DimensionValue]:
        """
        Convert object to a list of DimensionValue.
        """
    def _convert_to_series(
        self,
        name: Union[str, int],
        values: SeriesValues,
        infer_type: InferType,
        unit: Optional[str] = None,
    ) -> Series:
        series = {
            "name": str(name),
            "values": values,
            "type": infer_type.value,
        }
        if unit is not None:
            series["unit"] = unit
        return series
            get_series_list()
  
      abstractmethod
  
    Convert data to a list of dictionaries representing series.
Returns:
| Type | Description | 
|---|---|
| List[Series] | A list of dictionaries representing series, | 
| List[Series] | where each dictionary has  | 
Source code in src/ipyvizzu/data/converters/converter.py
              @abstractmethod
def get_series_list(self) -> List[Series]:
    """
    Convert data to a list of dictionaries representing series.
    Returns:
        A list of dictionaries representing series,
        where each dictionary has `name`, `values` and `type` keys.
    """
            _convert_to_series_values_and_type(obj)
  
      abstractmethod
  
    Convert object to SeriesValues and InferType.
Source code in src/ipyvizzu/data/converters/converter.py
              @abstractmethod
def _convert_to_series_values_and_type(
    self, obj: Any
) -> Tuple[SeriesValues, InferType]:
    """
    Convert object to SeriesValues and InferType.
    """
            _convert_to_measure_values(obj)
  
      abstractmethod
  
    Convert object to a list of MeasureValue.
Source code in src/ipyvizzu/data/converters/converter.py
              @abstractmethod
def _convert_to_measure_values(self, obj: Any) -> List[MeasureValue]:
    """
    Convert object to a list of MeasureValue.
    """
            _convert_to_dimension_values(obj)
  
      abstractmethod
  
    Convert object to a list of DimensionValue.
Source code in src/ipyvizzu/data/converters/converter.py
              @abstractmethod
def _convert_to_dimension_values(self, obj: Any) -> List[DimensionValue]:
    """
    Convert object to a list of DimensionValue.
    """
            ipyvizzu.data.converters.defaults
    This module provides default values for converters.
            NAN_DIMENSION = ''
  
      module-attribute
  
    Default dimension value to replace nan values.
            NAN_MEASURE = 0
  
      module-attribute
  
    Default measure value to replace nan values.
            ipyvizzu.data.converters.df
    This module provides modules for data frame converter.
            converter
    This module provides the DataFrameConverter abstract class.
            DataFrameConverter
    
              Bases: ToSeriesListConverter
Converts data frame into a list of dictionaries representing series.
Each dictionary contains information about the series name, values and type.
Source code in src/ipyvizzu/data/converters/df/converter.py
                class DataFrameConverter(ToSeriesListConverter):
    """
    Converts data frame into a list of dictionaries representing series.
    Each dictionary contains information about the series `name`, `values` and `type`.
    """
    # pylint: disable=too-few-public-methods
    def __init__(
        self,
        default_measure_value: MeasureValue,
        default_dimension_value: DimensionValue,
        max_rows: int,
        units: Optional[Dict[str, str]] = None,
    ) -> None:
        super().__init__(default_measure_value, default_dimension_value)
        self._max_rows = max_rows
        self._units = units or {}
    def get_series_list(self) -> List[Series]:
        """
        Convert the `DataFrame` columns to a list of dictionaries representing series.
        Returns:
            A list of dictionaries representing series,
            where each dictionary has `name`, `values` and `type` keys.
        """
        series_list = []
        for name in self._get_columns():
            series_list.append(self._get_series_from_column(name))
        return series_list
    def _get_series_from_column(self, column_name: str) -> Series:
        values, infer_type = self._convert_to_series_values_and_type(column_name)
        unit = self._units.get(column_name, None)
        return self._convert_to_series(column_name, values, infer_type, unit)
    def _is_max_rows_exceeded(self, row_number: int) -> bool:
        if row_number > self._max_rows:
            warnings.warn(
                "The number of rows of the dataframe exceeds the set `max_rows`, "
                f"the dataframe is randomly sampled to the set value ({self._max_rows}).",
                UserWarning,
                stacklevel=2,
            )
            return True
        return False
    @abstractmethod
    def _get_sampled_df(self, df: DataFrame) -> DataFrame:
        """
        Returns a sampled data frame for the maximum number of rows.
        """
    @abstractmethod
    def _get_columns(self) -> List[str]:
        """
        Return column names of the data frame.
        """
            get_series_list()
    Convert the DataFrame columns to a list of dictionaries representing series.
Returns:
| Type | Description | 
|---|---|
| List[Series] | A list of dictionaries representing series, | 
| List[Series] | where each dictionary has  | 
Source code in src/ipyvizzu/data/converters/df/converter.py
              def get_series_list(self) -> List[Series]:
    """
    Convert the `DataFrame` columns to a list of dictionaries representing series.
    Returns:
        A list of dictionaries representing series,
        where each dictionary has `name`, `values` and `type` keys.
    """
    series_list = []
    for name in self._get_columns():
        series_list.append(self._get_series_from_column(name))
    return series_list
            _get_sampled_df(df)
  
      abstractmethod
  
    Returns a sampled data frame for the maximum number of rows.
Source code in src/ipyvizzu/data/converters/df/converter.py
              @abstractmethod
def _get_sampled_df(self, df: DataFrame) -> DataFrame:
    """
    Returns a sampled data frame for the maximum number of rows.
    """
            _get_columns()
  
      abstractmethod
  
    Return column names of the data frame.
Source code in src/ipyvizzu/data/converters/df/converter.py
              @abstractmethod
def _get_columns(self) -> List[str]:
    """
    Return column names of the data frame.
    """
            defaults
    This module provides default values for data frame converter.
            MAX_ROWS = 100000
  
      module-attribute
  
    Default maximum number of rows.
            type_alias
    This module provides typing aliases for data frame converter.
            DataFrame = TypeVar('DataFrame', Any, Any)
  
      module-attribute
  
    Represents a data frame.
            ipyvizzu.data.converters.numpy
    This module provides modules for numpy converter.
            converter
    This module provides the NumpyArrayConverter class,
which allows converting a numpy array
into a list of dictionaries representing series.
            NumpyArrayConverter
    
              Bases: ToSeriesListConverter
Converts a numpy array into a list of dictionaries representing series.
Each dictionary contains information about the series name, values and type.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| np_array | array | The  | required | 
| column_name | Optional[ColumnName] | The name of a column. By default, uses column indices. Can be set with an Index:Name pair or, for single-dimensional arrays, with just the Name. | None | 
| column_dtype | Optional[ColumnDtype] | The dtype of a column. By default, uses the np_array's dtype. Can be set with an Index:DType pair or, for single-dimensional arrays, with just the DType. | None | 
| default_measure_value | MeasureValue | Default value to use for missing measure values. Defaults to 0. | NAN_MEASURE | 
| default_dimension_value | DimensionValue | Default value to use for missing dimension values. Defaults to an empty string. | NAN_DIMENSION | 
Example
Get series list from numpy array:
converter = NumpyArrayConverter(np_array)
series_list = converter.get_series_list()Source code in src/ipyvizzu/data/converters/numpy/converter.py
                class NumpyArrayConverter(ToSeriesListConverter):
    """
    Converts a `numpy` `array` into a list of dictionaries representing series.
    Each dictionary contains information about the series `name`, `values` and `type`.
    Parameters:
        np_array: The `numpy` `array` to convert.
        column_name:
            The name of a column. By default, uses column indices. Can be set with an
            Index:Name pair or, for single-dimensional arrays, with just the Name.
        column_dtype:
            The dtype of a column. By default, uses the np_array's dtype. Can be set
            with an Index:DType pair or, for single-dimensional arrays, with just the DType.
        default_measure_value:
            Default value to use for missing measure values. Defaults to 0.
        default_dimension_value:
            Default value to use for missing dimension values. Defaults to an empty string.
    Example:
        Get series list from `numpy` `array`:
            converter = NumpyArrayConverter(np_array)
            series_list = converter.get_series_list()
    """
    # pylint: disable=too-few-public-methods
    def __init__(
        self,
        np_array: "numpy.array",  # type: ignore
        column_name: Optional[ColumnName] = None,
        column_dtype: Optional[ColumnDtype] = None,
        column_unit: Optional[ColumnUnit] = None,
        default_measure_value: MeasureValue = NAN_MEASURE,
        default_dimension_value: DimensionValue = NAN_DIMENSION,
    ) -> None:
        # pylint: disable=too-many-arguments,too-many-positional-arguments
        super().__init__(default_measure_value, default_dimension_value)
        self._np = self._get_numpy()
        self._np_array = np_array
        self._column_name: Dict[Index, Name] = self._get_columns_config(column_name)
        self._column_dtype: Dict[Index, DType] = self._get_columns_config(column_dtype)
        self._column_unit: Dict[Index, Unit] = self._get_columns_config(column_unit)
    def get_series_list(self) -> List[Series]:
        """
        Convert the `numpy` `array` to a list of dictionaries representing series.
        Returns:
            A list of dictionaries representing series,
            where each dictionary has `name`, `values` and `type` keys.
        """
        if self._np_array.ndim == 0:
            return []
        if self._np_array.ndim == 1:
            return self._get_series_list_from_array1dim()
        if self._np_array.ndim == 2:
            return self._get_series_list_from_array2dim()
        raise ValueError("arrays larger than 2D are not supported")
    def _get_series_list_from_array1dim(self) -> List[Series]:
        i = 0
        name = self._column_name.get(i, i)
        unit = self._column_unit.get(i, None)
        values, infer_type = self._convert_to_series_values_and_type(
            (i, self._np_array)
        )
        return [self._convert_to_series(name, values, infer_type, unit)]
    def _get_series_list_from_array2dim(self) -> List[Series]:
        series_list = []
        for i in range(self._np_array.shape[1]):
            name = self._column_name.get(i, i)
            unit = self._column_unit.get(i, None)
            values, infer_type = self._convert_to_series_values_and_type(
                (i, self._np_array[:, i])
            )
            series_list.append(self._convert_to_series(name, values, infer_type, unit))
        return series_list
    def _get_numpy(self) -> ModuleType:
        try:
            import numpy as np  # pylint: disable=import-outside-toplevel
            return np
        except ImportError as error:
            raise ImportError(
                "numpy is not available. Please install numpy to use this feature."
            ) from error
    def _get_columns_config(
        self,
        config: Optional[Union[ColumnConfig, Dict[Index, ColumnConfig]]],
    ) -> Dict[Index, ColumnConfig]:
        if config is None:
            return {}
        if not isinstance(config, dict):
            if not self._np_array.ndim == 1:
                raise ValueError("non dict value can only be used for a 1D array")
            return {0: config}
        return config
    def _convert_to_series_values_and_type(
        self, obj: Tuple[int, "numpy.array"]  # type: ignore
    ) -> Tuple[SeriesValues, InferType]:
        column = obj
        i = column[0]
        array = column[1]
        dtype = self._column_dtype.get(i, self._np_array.dtype)
        if self._np.issubdtype(dtype, self._np.number):
            return self._convert_to_measure_values(array), InferType.MEASURE
        return self._convert_to_dimension_values(array), InferType.DIMENSION
    def _convert_to_measure_values(
        self, obj: "numpy.array"  # type: ignore
    ) -> List[MeasureValue]:
        array = obj
        array_float = array.astype(float)
        return self._np.nan_to_num(
            array_float, nan=self._default_measure_value
        ).tolist()
    def _convert_to_dimension_values(
        self, obj: "numpy.array"  # type: ignore
    ) -> List[DimensionValue]:
        array = obj
        array_str = array.astype(str)
        replace_nan = "nan"
        mask = array_str == replace_nan
        array_str[mask] = self._default_dimension_value
        return array_str.tolist()
            get_series_list()
    Convert the numpy array to a list of dictionaries representing series.
Returns:
| Type | Description | 
|---|---|
| List[Series] | A list of dictionaries representing series, | 
| List[Series] | where each dictionary has  | 
Source code in src/ipyvizzu/data/converters/numpy/converter.py
              def get_series_list(self) -> List[Series]:
    """
    Convert the `numpy` `array` to a list of dictionaries representing series.
    Returns:
        A list of dictionaries representing series,
        where each dictionary has `name`, `values` and `type` keys.
    """
    if self._np_array.ndim == 0:
        return []
    if self._np_array.ndim == 1:
        return self._get_series_list_from_array1dim()
    if self._np_array.ndim == 2:
        return self._get_series_list_from_array2dim()
    raise ValueError("arrays larger than 2D are not supported")
            type_alias
    This module provides typing aliases for numpy converter.
            Index = int
  
      module-attribute
  
    Represents the index of a column.
            Name = str
  
      module-attribute
  
    Represents the name of a column.
            DType = type
  
      module-attribute
  
    Represents the dtype of a column.
            Unit = str
  
      module-attribute
  
    Represents the unit of a column.
            ColumnName = Union[Name, Dict[Index, Name]]
  
      module-attribute
  
    Represents a column name. It is a dictionary of Index:Name pairs or for single-dimensional arrays, it can be just a Name.
            ColumnDtype = Union[DType, Dict[Index, DType]]
  
      module-attribute
  
    Represents a column dtype. It is a dictionary of Index:DType pairs or for single-dimensional arrays, it can be just a DType.
            ColumnUnit = Union[Unit, Dict[Index, Unit]]
  
      module-attribute
  
    Represents a column unit. It is a dictionary of Index:Unit pairs or for single-dimensional arrays, it can be just a Unit.
            ColumnConfig = TypeVar('ColumnConfig', Name, DType, Unit)
  
      module-attribute
  
    Represents a column config. It can be Name, DType or Unit.
            ipyvizzu.data.converters.pandas
    This module provides modules for pandas converter.
            converter
    This module provides the PandasDataFrameConverter class,
which allows converting a pandas DataFrame or Series
into a list of dictionaries representing series.
            PandasDataFrameConverter
    
              Bases: DataFrameConverter
Converts a pandas DataFrame or Series into a list of dictionaries representing series.
Each dictionary contains information about the series name, values and type.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| df | Union[DataFrame, Series] | The  | required | 
| default_measure_value | MeasureValue | Default value to use for missing measure values. Defaults to 0. | NAN_MEASURE | 
| default_dimension_value | DimensionValue | Default value to use for missing dimension values. Defaults to an empty string. | NAN_DIMENSION | 
| max_rows | int | The maximum number of rows to include in the converted series list.
If the  | MAX_ROWS | 
| include_index | Optional[str] | Name for the index column to include as a series. If provided, the index column will be added. Defaults to None. | None | 
Example
Get series list from DataFrame columns:
converter = PandasDataFrameConverter(df)
series_list = converter.get_series_list()Source code in src/ipyvizzu/data/converters/pandas/converter.py
                class PandasDataFrameConverter(DataFrameConverter):
    """
    Converts a `pandas` `DataFrame` or `Series` into a list of dictionaries representing series.
    Each dictionary contains information about the series `name`, `values` and `type`.
    Parameters:
        df: The `pandas` `DataFrame` or `Series` to convert.
        default_measure_value:
            Default value to use for missing measure values. Defaults to 0.
        default_dimension_value:
            Default value to use for missing dimension values. Defaults to an empty string.
        max_rows: The maximum number of rows to include in the converted series list.
            If the `df` contains more rows,
            a random sample of the given number of rows will be taken.
        include_index:
            Name for the index column to include as a series.
            If provided, the index column will be added. Defaults to None.
    Example:
        Get series list from `DataFrame` columns:
            converter = PandasDataFrameConverter(df)
            series_list = converter.get_series_list()
    """
    def __init__(
        self,
        df: Union["pandas.DataFrame", "pandas.Series"],  # type: ignore
        default_measure_value: MeasureValue = NAN_MEASURE,
        default_dimension_value: DimensionValue = NAN_DIMENSION,
        max_rows: int = MAX_ROWS,
        include_index: Optional[str] = None,
        units: Optional[Dict[str, str]] = None,
    ) -> None:
        # pylint: disable=too-many-arguments,too-many-positional-arguments
        super().__init__(
            default_measure_value, default_dimension_value, max_rows, units
        )
        self._pd = self._get_pandas()
        self._df = self._get_sampled_df(
            self._convert_to_df(df) if isinstance(df, PandasSeries) else df
        )
        self._include_index = include_index
    def get_series_list(self) -> List[Series]:
        """
        Convert the `DataFrame` columns to a list of dictionaries representing series.
        Returns:
            A list of dictionaries representing series,
            where each dictionary has `name`, `values` and `type` keys.
        """
        series_list = super().get_series_list()
        index_series = self.get_series_from_index()
        return index_series + series_list
    def get_series_from_index(self) -> List[Series]:
        """
        Convert the `DataFrame` index to a dictionary representing a series,
        if `include_index` is provided.
        Returns:
            A dictionary representing the index series with `name`, `values` and `type` keys.
            Returns `None` if `include_index` is not provided.
        """
        if not self._include_index or self._df.index.empty:
            return []
        df = self._pd.DataFrame({self._include_index: self._df.index})
        index_series_converter = PandasDataFrameConverter(
            df, self._default_measure_value, self._default_dimension_value
        )
        return index_series_converter.get_series_list()
    def _get_pandas(self) -> ModuleType:
        try:
            import pandas as pd  # pylint: disable=import-outside-toplevel
            return pd
        except ImportError as error:
            raise ImportError(
                "pandas is not available. Please install pandas to use this feature."
            ) from error
    def _convert_to_df(self, series: "pandas.Series") -> "pandas.Dataframe":  # type: ignore
        if series.empty:
            return self._pd.DataFrame()
        return self._pd.DataFrame(series)
    def _get_sampled_df(self, df: "pandas.DataFrame") -> "pandas.DataFrame":  # type: ignore
        row_number = len(df)
        if self._is_max_rows_exceeded(row_number):
            frac = self._max_rows / row_number
            sampled_df = df.sample(
                replace=False,
                frac=frac,
                random_state=42,
            )
            return sampled_df
        return df
    def _get_columns(self) -> List[str]:
        return self._df.columns
    def _convert_to_series_values_and_type(
        self, obj: str  # type: ignore
    ) -> Tuple[SeriesValues, InferType]:
        column_name = obj
        column = self._df[column_name]
        if self._pd.api.types.is_numeric_dtype(column.dtype):
            return self._convert_to_measure_values(column), InferType.MEASURE
        return self._convert_to_dimension_values(column), InferType.DIMENSION
    def _convert_to_measure_values(
        self, obj: "pandas.DataFrame"  # type: ignore
    ) -> List[MeasureValue]:
        column = obj
        return column.fillna(self._default_measure_value).astype(float).values.tolist()
    def _convert_to_dimension_values(
        self, obj: "pandas.DataFrame"  # type: ignore
    ) -> List[DimensionValue]:
        column = obj
        return column.fillna(self._default_dimension_value).astype(str).values.tolist()
            get_series_list()
    Convert the DataFrame columns to a list of dictionaries representing series.
Returns:
| Type | Description | 
|---|---|
| List[Series] | A list of dictionaries representing series, | 
| List[Series] | where each dictionary has  | 
Source code in src/ipyvizzu/data/converters/pandas/converter.py
              def get_series_list(self) -> List[Series]:
    """
    Convert the `DataFrame` columns to a list of dictionaries representing series.
    Returns:
        A list of dictionaries representing series,
        where each dictionary has `name`, `values` and `type` keys.
    """
    series_list = super().get_series_list()
    index_series = self.get_series_from_index()
    return index_series + series_list
            get_series_from_index()
    Convert the DataFrame index to a dictionary representing a series,
if include_index is provided.
Returns:
| Type | Description | 
|---|---|
| List[Series] | A dictionary representing the index series with  | 
| List[Series] | Returns  | 
Source code in src/ipyvizzu/data/converters/pandas/converter.py
              def get_series_from_index(self) -> List[Series]:
    """
    Convert the `DataFrame` index to a dictionary representing a series,
    if `include_index` is provided.
    Returns:
        A dictionary representing the index series with `name`, `values` and `type` keys.
        Returns `None` if `include_index` is not provided.
    """
    if not self._include_index or self._df.index.empty:
        return []
    df = self._pd.DataFrame({self._include_index: self._df.index})
    index_series_converter = PandasDataFrameConverter(
        df, self._default_measure_value, self._default_dimension_value
    )
    return index_series_converter.get_series_list()
            protocol
    This module provides protocol classes for pandas data frame converter.
            PandasDataFrame
    
              Bases: Protocol
Represents a pandas DataFrame Protocol.
Source code in src/ipyvizzu/data/converters/pandas/protocol.py
                @runtime_checkable
class PandasDataFrame(Protocol):
    """
    Represents a pandas DataFrame Protocol.
    """
    # pylint: disable=too-few-public-methods
    index: Any
    columns: Sequence[str]
    sample: Callable[..., Any]
    __len__: Callable[[], int]
    __getitem__: Callable[[Any], Any]
            PandasSeries
    
              Bases: Protocol
Represents a pandas Series Protocol.
Source code in src/ipyvizzu/data/converters/pandas/protocol.py
                @runtime_checkable
class PandasSeries(Protocol):
    """
    Represents a pandas Series Protocol.
    """
    # pylint: disable=too-few-public-methods
    index: Any
    values: Any
    dtype: Any
    __len__: Callable[[], int]
    __getitem__: Callable[[Any], Any]
            ipyvizzu.data.converters.spark
    This module provides modules for pyspark converter.
            converter
    This module provides the SparkDataFrameConverter class,
which allows converting a pyspark DataFrame
into a list of dictionaries representing series.
            SparkDataFrameConverter
    
              Bases: DataFrameConverter
Converts a pyspark DataFrame into a list of dictionaries representing series.
Each dictionary contains information about the series name, values and type.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
| df | DataFrame | The  | required | 
| default_measure_value | MeasureValue | Default value to use for missing measure values. Defaults to 0. | NAN_MEASURE | 
| default_dimension_value | DimensionValue | Default value to use for missing dimension values. Defaults to an empty string. | NAN_DIMENSION | 
| max_rows | int | The maximum number of rows to include in the converted series list.
If the  | MAX_ROWS | 
Example
Get series list from DataFrame columns:
converter = SparkDataFrameConverter(df)
series_list = converter.get_series_list()Source code in src/ipyvizzu/data/converters/spark/converter.py
                class SparkDataFrameConverter(DataFrameConverter):
    """
    Converts a `pyspark` `DataFrame` into a list of dictionaries representing series.
    Each dictionary contains information about the series `name`, `values` and `type`.
    Parameters:
        df: The `pyspark` `DataFrame` to convert.
        default_measure_value:
            Default value to use for missing measure values. Defaults to 0.
        default_dimension_value:
            Default value to use for missing dimension values. Defaults to an empty string.
        max_rows: The maximum number of rows to include in the converted series list.
            If the `df` contains more rows,
            a random sample of the given number of rows (approximately) will be taken.
    Example:
        Get series list from `DataFrame` columns:
            converter = SparkDataFrameConverter(df)
            series_list = converter.get_series_list()
    """
    # pylint: disable=too-few-public-methods
    def __init__(
        self,
        df: "pyspark.sql.DataFrame",  # type: ignore
        default_measure_value: MeasureValue = NAN_MEASURE,
        default_dimension_value: DimensionValue = NAN_DIMENSION,
        max_rows: int = MAX_ROWS,
        units: Optional[Dict[str, str]] = None,
    ) -> None:
        # pylint: disable=too-many-arguments,too-many-positional-arguments
        super().__init__(
            default_measure_value, default_dimension_value, max_rows, units
        )
        self._pyspark, self._pyspark_func = self._get_pyspark()
        self._df = self._get_sampled_df(df)
    def _get_pyspark(self) -> Tuple[ModuleType, ModuleType]:
        try:
            import pyspark  # pylint: disable=import-outside-toplevel
            from pyspark.sql import functions  # pylint: disable=import-outside-toplevel
            return pyspark, functions
        except ImportError as error:
            raise ImportError(
                "pyspark is not available. Please install pyspark to use this feature."
            ) from error
    def _get_sampled_df(
        self, df: "pyspark.sql.DataFrame"  # type: ignore
    ) -> "pyspark.sql.DataFrame":  # type: ignore
        row_number = df.count()
        if self._is_max_rows_exceeded(row_number):
            fraction = self._max_rows / row_number
            sample_df = df.sample(withReplacement=False, fraction=fraction, seed=42)
            return sample_df.limit(self._max_rows)
        return df
    def _get_columns(self) -> List[str]:
        return self._df.columns
    def _convert_to_series_values_and_type(
        self, obj: str
    ) -> Tuple[SeriesValues, InferType]:
        column_name = obj
        column = self._df.select(column_name)
        integer_type = self._pyspark.sql.types.IntegerType
        double_type = self._pyspark.sql.types.DoubleType
        if isinstance(column.schema[column_name].dataType, (integer_type, double_type)):
            return self._convert_to_measure_values(column_name), InferType.MEASURE
        return self._convert_to_dimension_values(column_name), InferType.DIMENSION
    def _convert_to_measure_values(self, obj: str) -> List[MeasureValue]:
        column_name = obj
        func = self._pyspark_func
        df = self._df.withColumn(
            column_name,
            func.when(
                func.col(column_name).isNull(), self._default_measure_value
            ).otherwise(func.col(column_name)),
        )
        df_rdd = (
            df.withColumn(column_name, func.col(column_name).cast("float"))
            .select(column_name)
            .rdd
        )
        return df_rdd.flatMap(list).collect()
    def _convert_to_dimension_values(self, obj: str) -> List[DimensionValue]:
        column_name = obj
        func = self._pyspark_func
        df = self._df.withColumn(
            column_name,
            func.when(
                func.col(column_name).isNull(), self._default_dimension_value
            ).otherwise(func.col(column_name)),
        )
        df_rdd = (
            df.withColumn(column_name, func.col(column_name).cast("string"))
            .select(column_name)
            .rdd
        )
        return df_rdd.flatMap(list).collect()
            protocol
    This module provides protocol classes for pandas data frame converter.
            SparkDataFrame
    
              Bases: Protocol
Represents a pyspark DataFrame Protocol.
Source code in src/ipyvizzu/data/converters/spark/protocol.py
                @runtime_checkable
class SparkDataFrame(Protocol):
    """
    Represents a pyspark DataFrame Protocol.
    """
    # pylint: disable=too-few-public-methods
    columns: Sequence[str]
    count: Callable[..., int]
    sample: Callable[..., Any]
    limit: Callable[..., Any]
    select: Callable[..., Any]
    withColumn: Callable[..., Any]
    rdd: Any