descope.dataset

class BaseDataset(adata: str | AnnData, pert_col: str = 'perturbation', ctrl_name: str = 'control', perts_to_include: list | None = None, perts_to_exclude: list | None = None, gene_embs_file: str = './ESM2_pert_features.pt')[source]

Bases: Dataset, ABC

MAIN_INPUT_NAME = None
RANDOM_MAPPING_CONTROL_TO_CONTROL = False
get_ctrl_cell_indices(adata: AnnData) list[int][source]
abstract preprocess_adata(adata: AnnData) AnnData[source]
class DatasetForATAC(adata: str | AnnData, pert_col: str = 'perturbation', ctrl_name: str = 'control', topk_ccres: int = 50000, perts_to_include: list | None = None, perts_to_exclude: list | None = None, gene_embs_file: str = './ESM2_pert_features.pt')[source]

Bases: BaseDataset

MAIN_INPUT_NAME = 'ctrl_cell_tf_idf'
RANDOM_MAPPING_CONTROL_TO_CONTROL = False
preprocess_adata(adata: AnnData) AnnData[source]
class DatasetForRNA(adata: str | AnnData, pert_col: str = 'target_gene', ctrl_name: str = 'non-targeting', target_sum: float = 10000.0, skip_raw_counts_check: bool = False, perts_to_include: list | None = None, perts_to_exclude: list | None = None, gene_embs_file: str = './ESM2_pert_features.pt')[source]

Bases: BaseDataset

MAIN_INPUT_NAME = 'ctrl_cell_expr'
RANDOM_MAPPING_CONTROL_TO_CONTROL = False
preprocess_adata(adata: AnnData) AnnData[source]
class HFBaseDataset(hf_dataset: Dataset, ctrl_name: str = 'control', gene_embs_file: str = './ESM2_pert_features.pt', mse_weights_pkl_file: str | None = None)[source]

Bases: Dataset

MAIN_INPUT_NAME = None
RANDOM_MAPPING_CONTROL_TO_CONTROL = False
static collate_fn(batch)[source]
get_ctrl_cell_indices_for_each_celltype() dict[str, list[int]][source]
class HFDatasetForATAC(hf_dataset: Dataset, ctrl_name: str = 'control', gene_embs_file: str = './ESM2_pert_features.pt', mse_weights_pkl_file: str | None = None)[source]

Bases: HFBaseDataset

MAIN_INPUT_NAME = 'ctrl_cell_tf_idf'
RANDOM_MAPPING_CONTROL_TO_CONTROL = False
class HFDatasetForRNA(hf_dataset: Dataset, ctrl_name: str = 'control', gene_embs_file: str = './ESM2_pert_features.pt', mse_weights_pkl_file: str | None = None)[source]

Bases: HFBaseDataset

MAIN_INPUT_NAME = 'ctrl_cell_expr'
RANDOM_MAPPING_CONTROL_TO_CONTROL = False