causal_hub.datasets#

Submodule datasets.

class causal_hub.datasets.Dataset#

Bases: object

A tabular dataset.

Categorical#

alias of PyDataset_Categorical

CategoricalIncomplete#

alias of PyDataset_CategoricalIncomplete

Gaussian#

alias of PyDataset_Gaussian

GaussianIncomplete#

alias of PyDataset_GaussianIncomplete

class causal_hub.datasets.CatTable#

Bases: object

A categorical tabular dataset.

classmethod from_pandas(df)#

Constructs a new categorical tabular dataset from a Pandas DataFrame.

Parameters:

df (pandas.DataFrame) – A Pandas DataFrame containing only categorical columns.

Returns:

A new categorical tabular dataset instance.

Return type:

CatTable

labels()#

The labels of the dataset.

Returns:

A list of strings containing the labels of the dataset.

Return type:

list[str]

sample_size()#

The sample size.

Returns:

The number of samples in the dataset. If the dataset is weighted, this returns the sum of the weights.

Return type:

float

states()#

Returns the states of the dataset.

Returns:

A dictionary mapping each label to a tuple of its possible states.

Return type:

dict[str, tuple[str, …]]

to_pandas()#

Converts the dataset to a Pandas DataFrame.

Returns:

A Pandas DataFrame.

Return type:

pandas.DataFrame

values()#

The values of the dataset.

Returns:

A 2D NumPy array containing the values of the dataset.

Return type:

numpy.ndarray

class causal_hub.datasets.CatIncTable#

Bases: object

A categorical incomplete tabular dataset.

classmethod from_pandas(df)#

Constructs a new categorical incomplete tabular dataset from a Pandas DataFrame.

Parameters:

df (pandas.DataFrame) – A Pandas DataFrame containing categorical columns with missing values.

Returns:

A new categorical incomplete tabular dataset instance.

Return type:

CatIncTable

labels()#

The labels of the dataset.

Returns:

A list of strings containing the labels of the dataset.

Return type:

list[str]

missing()#

The missing information table.

Returns:

A missing information table instance.

Return type:

MissingTable

sample_size()#

The sample size.

Returns:

The number of samples in the dataset. If the dataset is weighted, this returns the sum of the weights.

Return type:

float

states()#

Returns the states of the dataset.

Returns:

A dictionary mapping each label to a tuple of its possible states.

Return type:

dict[str, tuple[str, …]]

to_pandas()#

Converts the dataset to a Pandas DataFrame.

Returns:

A Pandas DataFrame.

Return type:

pandas.DataFrame

values()#

The values of the dataset.

Returns:

A 2D NumPy array containing the values of the dataset.

Return type:

numpy.ndarray

class causal_hub.datasets.GaussTable#

Bases: object

A Gaussian tabular dataset.

classmethod from_pandas(df)#

Constructs a new Gaussian tabular dataset from a Pandas DataFrame.

Parameters:

df (pandas.DataFrame) – A Pandas DataFrame containing only float64 columns.

Returns:

A new Gaussian tabular dataset instance.

Return type:

GaussTable

labels()#

The labels of the dataset.

Returns:

A list of strings containing the labels of the dataset.

Return type:

list[str]

sample_size()#

The sample size.

Returns:

The number of samples in the dataset. If the dataset is weighted, this returns the sum of the weights.

Return type:

float

to_pandas()#

Converts the dataset to a Pandas DataFrame.

Returns:

A Pandas DataFrame.

Return type:

pandas.DataFrame

values()#

The values of the dataset.

Returns:

A 2D NumPy array containing the values of the dataset.

Return type:

numpy.ndarray

class causal_hub.datasets.GaussIncTable(labels, values)#

Bases: object

A Gaussian incomplete tabular dataset.

classmethod __new__(*args, **kwargs)#
classmethod from_pandas(df)#

Constructs a new gaussian incomplete tabular dataset from a Pandas DataFrame.

Parameters:

df (pandas.DataFrame) – A Pandas DataFrame containing gaussian columns with missing values.

Returns:

A new gaussian incomplete tabular dataset instance.

Return type:

GaussIncTable

labels()#

The labels of the dataset.

Returns:

A list of strings containing the labels of the dataset.

Return type:

list[str]

missing()#

The missing information of the dataset.

Returns:

The missing information of the dataset.

Return type:

MissingTable

sample_size()#

The number of samples in the dataset.

Returns:

To number of samples in the dataset.

Return type:

int

to_pandas()#

Converts the dataset to a Pandas DataFrame.

Returns:

A Pandas DataFrame.

Return type:

pandas.DataFrame

values()#

The values of the dataset.

Returns:

A 2D numpy array containing the values of the dataset.

Return type:

numpy.ndarray

class causal_hub.datasets.MissingTable(labels, mask)#

Bases: object

A struct for missing information in a tabular dataset.

classmethod __new__(*args, **kwargs)#
complete_cols_count()#

The complete columns count.

Returns:

The number of complete columns in the dataset.

Return type:

int

complete_rows_count()#

The complete rows count.

Returns:

The number of complete rows in the dataset.

Return type:

int

fully_observed()#

The fully observed variable sets.

Returns:

The set of fully observed variables.

Return type:

set[int]

labels()#

The labels of the dataset.

Returns:

A list of strings containing the labels of the dataset.

Return type:

list[str]

missing_correlation()#

The missing correlation of the dataset.

Returns:

A 2D NumPy array containing the missing correlation of the dataset.

Return type:

numpy.ndarray

missing_count()#

The missing count of the dataset.

Returns:

The number of missing values in the dataset.

Return type:

int

missing_count_by_cols()#

The missing count by columns.

Returns:

A 1D NumPy array containing the missing count by columns.

Return type:

numpy.ndarray

missing_count_by_rows()#

The missing count by rows.

Returns:

A 1D NumPy array containing the missing count by rows.

Return type:

numpy.ndarray

missing_covariance()#

The missing covariance of the dataset.

Returns:

A 2D NumPy array containing the missing covariance of the dataset.

Return type:

numpy.ndarray

missing_mask()#

The missing mask of the dataset.

Returns:

A 2D NumPy array containing the missing mask of the dataset.

Return type:

numpy.ndarray

missing_mask_by_cols()#

The missing mask by columns.

Returns:

A 1D NumPy array containing the missing mask by columns.

Return type:

numpy.ndarray

missing_mask_by_rows()#

The missing mask by rows.

Returns:

A 1D NumPy array containing the missing mask by rows.

Return type:

numpy.ndarray

missing_rate()#

The missing rate of the dataset.

Returns:

The missing rate of the dataset.

Return type:

float

missing_rate_by_cols()#

The missing rate by columns.

Returns:

A 1D NumPy array containing the missing rate by columns.

Return type:

numpy.ndarray

missing_rate_by_rows()#

The missing rate by rows.

Returns:

A 1D NumPy array containing the missing rate by rows.

Return type:

numpy.ndarray

partially_observed()#

The partially observed variable sets.

Returns:

The set of partially observed variables.

Return type:

set[int]

class causal_hub.datasets.CatTrj#

Bases: object

A categorical trajectory.

classmethod from_pandas(df)#

Constructs a new categorical trajectory from a Pandas DataFrame.

Parameters:

df (pandas.DataFrame) – A Pandas DataFrame containing the trajectory data. The data frame must contain a column named “time” that represents the time of each event. Every other column in the data frame must represent a categorical variable.

Returns:

A new categorical trajectory instance.

Return type:

CatTrj

labels()#

Returns the labels of the categorical trajectory.

Returns:

A reference to the labels of the categorical trajectory.

Return type:

list[str]

states()#

Returns the states of the categorical trajectory.

Returns:

A reference to the states of the categorical trajectory.

Return type:

dict[str, tuple[str, …]]

times()#

Returns the times of the trajectory.

Returns:

A reference to the times of the trajectory.

Return type:

numpy.ndarray

to_pandas()#

Converts the categorical trajectory to a Pandas DataFrame.

Returns:

A Pandas DataFrame representation of the categorical trajectory.

Return type:

pandas.DataFrame

values()#

Returns the values of the trajectory.

Returns:

A reference to the values of the trajectory.

Return type:

numpy.ndarray

class causal_hub.datasets.CatTrjs#

Bases: object

A collection of categorical trajectories.

classmethod from_pandas(dfs)#

Constructs a new categorical trajectories from an iterable of Pandas DataFrames.

Parameters:

dfs (Iterable[pandas.DataFrame]) – An iterable of Pandas DataFrames containing the trajectory data. Each data frame must contain a column named “time” that represents the time of each event. Every other column in the data frame must represent a categorical variable.

Returns:

A new categorical trajectories instance.

Return type:

CatTrjs

labels()#

Returns the labels of the categorical trajectory.

Returns:

A reference to the labels of the categorical trajectory.

Return type:

list[str]

states()#

Returns the states of the categorical trajectory.

Returns:

A reference to the states of the categorical trajectory.

Return type:

dict[str, tuple[str, …]]

to_pandas()#

Converts the categorical trajectories to a list of Pandas DataFrames.

Returns:

A list of Pandas DataFrame representations of the categorical trajectories.

Return type:

list[pandas.DataFrame]

values()#

Return the trajectories.

Returns:

A list of categorical trajectories.

Return type:

list[CatTrj]

class causal_hub.datasets.CatTrjEv#

Bases: object

A categorical trajectory evidence.

classmethod from_pandas(df, with_states=None)#

Constructs a new categorical trajectory evidence from a Pandas DataFrame.

Parameters:
  • df (pandas.DataFrame) – A Pandas DataFrame containing the trajectory evidence data. The data frame must contain the following columns:

  • event (-)

  • state (-)

  • start_time (-)

  • end_time (-)

  • with_states (dict[str, Iterable[str]] | None) – An optional dictionary mapping event labels to their possible states. If not provided, the states will be inferred from the data frame.

Returns:

A new categorical trajectory evidence instance.

Return type:

CatTrjEv

labels()#

Returns the labels of the categorical trajectory.

Returns:

A reference to the labels of the categorical trajectory.

Return type:

list[str]

states()#

Returns the states of the categorical trajectory.

Returns:

A reference to the states of the categorical trajectory.

Return type:

dict[str, tuple[str, …]]

class causal_hub.datasets.CatTrjsEv#

Bases: object

A collection of categorical trajectory evidences.

classmethod from_pandas(dfs, with_states=None)#

Constructs a new categorical trajectory evidence from an iterable of Pandas DataFrames.

Parameters:
  • dfs (Iterable[pandas.DataFrame]) – An iterable of Pandas DataFrames containing the trajectory evidence data. The data frames must contain the following columns:

  • event (-)

  • state (-)

  • start_time (-)

  • end_time (-)

  • with_states (dict[str, Iterable[str]] | None) – An optional dictionary mapping event labels to their possible states. If not provided, the states will be inferred from the data frame.

Returns:

A new categorical trajectory evidence instance.

Return type:

CatTrjsEv

labels()#

Returns the labels of the categorical trajectory.

Returns:

A reference to the labels of the categorical trajectory.

Return type:

list[str]

states()#

Returns the states of the categorical trajectory.

Returns:

A reference to the states of the categorical trajectory.

Return type:

dict[str, tuple[str, …]]

class causal_hub.datasets.CatWtdTrj#

Bases: object

A categorical trajectory with a weight.

labels()#

Returns the labels of the categorical trajectory.

Returns:

A reference to the labels of the categorical trajectory.

Return type:

list[str]

states()#

Returns the states of the categorical trajectory.

Returns:

A reference to the states of the categorical trajectory.

Return type:

dict[str, tuple[str, …]]

times()#

Returns the times of the trajectory.

Returns:

A reference to the times of the trajectory.

Return type:

numpy.ndarray

trajectory()#

Returns the trajectory.

Returns:

A reference to the trajectory.

Return type:

CatTrj

weight()#

Returns the weight of the trajectory.

Returns:

The weight of the trajectory.

Return type:

float

class causal_hub.datasets.CatWtdTrjs#

Bases: object

A collection of categorical trajectories with weights.

labels()#

Returns the labels of the categorical trajectory.

Returns:

A reference to the labels of the categorical trajectory.

Return type:

list[str]

states()#

Returns the states of the categorical trajectory.

Returns:

A reference to the states of the categorical trajectory.

Return type:

dict[str, tuple[str, …]]

values()#

Return the trajectories.

Returns:

A vector of categorical trajectories.

Return type:

list[CatWtdTrj]