Examples

This section contains a step-by-step tutorial on how to use the fishy-business library programmatically.

Getting Started

The simplest way to run a training experiment using the high-level run_unified_training interface.

 1# -*- coding: utf-8 -*-
 2"""
 3Tutorial 01: Getting Started
 4----------------------------
 5This tutorial demonstrates the simplest way to run a training experiment
 6using the high-level `run_unified_training` interface.
 7"""
 8
 9from pathlib import Path
10from fishy._core.config import TrainingConfig
11from fishy.experiments.unified_trainer import run_unified_training
12from fishy.cli.main import display_final_summary
13
14# Set up the path to your data
15PROJECT_ROOT = Path(__file__).resolve().parent.parent
16
17
18def main():
19    print("--- Tutorial 01: Getting Started ---")
20
21    # 1. Define a minimal configuration
22    config = TrainingConfig(
23        model="transformer",
24        dataset="species",
25        epochs=5,
26        batch_size=32,
27        wandb_log=False,
28    )
29
30    print(f"Launching a {config.model} training on the {config.dataset} dataset...")
31
32    # 2. Run the experiment
33    results = run_unified_training(config)
34
35    # 3. Inspect the results using the beautiful summary table
36    display_final_summary(results)
37
38
39if __name__ == "__main__":
40    main()

DataModule and Processing

Learn how the DataModule handles data loading, filtering, and conversion into PyTorch-ready tensors.

 1# -*- coding: utf-8 -*-
 2"""
 3Tutorial 02: DataModule and Data Processing
 4-------------------------------------------
 5This tutorial explains how the `DataModule` handles data loading,
 6filtering, and conversion into PyTorch-ready tensors.
 7"""
 8
 9from pathlib import Path
10from fishy.data.module import create_data_module
11
12# Path to the dataset
13PROJECT_ROOT = Path(__file__).resolve().parent.parent
14
15
16def main():
17    print("--- Tutorial 02: DataModule and Data Processing ---")
18
19    # 1. Create a DataModule
20    # You can select different datasets defined in fishy/configs/datasets.yaml
21    dataset_name = "species"
22    dm = create_data_module(dataset_name=dataset_name)
23
24    print(f"Initializing DataModule for: {dataset_name}")
25
26    # 2. Setup the module
27    # This triggers the actual loading from Excel/CSV and applies filters.
28    dm.setup()
29
30    # 3. Inspect metadata
31    # The module automatically determines input dimension and classes from the data.
32    print(f"  Input Dimension (features): {dm.get_input_dim()}")
33    print(f"  Number of Classes:          {dm.get_num_classes()}")
34    print(f"  Class Names:                {dm.get_class_names()}")
35
36    # 4. Accessing Tensors
37    # You can get the full dataset as NumPy arrays for inspection or traditional ML.
38    X, y = dm.get_numpy_data(labels_as_indices=True)
39    print(f"\nNumPy Data Shape: X={X.shape}, y={y.shape}")
40
41    # 5. Accessing the PyTorch DataLoader
42    # This is what's used during the deep learning training loop.
43    loader = dm.get_train_dataloader()
44    first_batch = next(iter(loader))
45    spectra, labels = first_batch
46
47    print("\nFirst PyTorch Batch:")
48    print(f"  Spectra tensor shape: {spectra.shape}")
49    print(f"  Labels tensor shape:  {labels.shape}")
50
51
52if __name__ == "__main__":
53    main()

Configuration Management

Using TrainingConfig and ExperimentConfig to centralize hyperparameters and experimental settings.

 1# -*- coding: utf-8 -*-
 2"""
 3Tutorial 03: Configuration Management
 4-------------------------------------
 5This tutorial covers the `TrainingConfig` and `ExperimentConfig` classes,
 6which centralize all hyperparameters and experimental settings.
 7"""
 8
 9from fishy._core.config import TrainingConfig, ExperimentConfig
10from pathlib import Path
11
12
13def main():
14    print("--- Tutorial 03: Configuration Management ---")
15
16    # 1. Single Run Configuration (TrainingConfig)
17    # This class holds everything needed for one training session.
18    train_cfg = TrainingConfig(
19        model="cnn",
20        dataset="part",
21        epochs=10,
22        learning_rate=5e-4,
23        batch_size=16,
24        data_augmentation=True,  # Enable built-in augmentation
25    )
26
27    print("\nTrainingConfig created:")
28    print(f"  Model: {train_cfg.model}")
29    print(f"  Augmentation: {train_cfg.data_augmentation}")
30
31    # 2. Saving and Loading YAML
32    # Configs can be serialized to disk for reproducibility or CLI use.
33    yaml_path = "example_config.yaml"
34    train_cfg.to_yaml(yaml_path)
35    print(f"  Config saved to {yaml_path}")
36
37    # Loading it back
38    loaded_cfg = TrainingConfig.from_yaml(yaml_path)
39    print(f"  Loaded Model: {loaded_cfg.model}")
40
41    # 3. Batch Configuration (ExperimentConfig)
42    # Used to orchestrate multiple models across multiple datasets.
43    exp_cfg = ExperimentConfig(
44        name="my_first_batch",
45        num_runs=5,  # Run each combination 5 times for statistics
46        datasets=["species", "oil"],
47        models=["cnn", "transformer", "opls-da"],
48        benchmark=True,  # Enable performance measuring for all
49        overrides={"epochs": 2},  # Force these settings on all runs
50    )
51
52    print("\nExperimentConfig created:")
53    print(f"  Batch Name: {exp_cfg.name}")
54    print(f"  Total combinations: {len(exp_cfg.datasets) * len(exp_cfg.models)}")
55
56    # Clean up
57    if Path(yaml_path).exists():
58        Path(yaml_path).unlink()
59
60
61if __name__ == "__main__":
62    main()

Training Engines

Exploring different ways to train models, from automated orchestration to direct control over the training loop.

 1# -*- coding: utf-8 -*-
 2"""
 3Tutorial 04: Training Engines (High vs. Low Level)
 4--------------------------------------------------
 5This tutorial explores different ways to train models, from automated
 6orchestration to direct control over the training loop.
 7"""
 8
 9import torch
10import torch.nn as nn
11from torch.utils.data import DataLoader, TensorDataset
12from fishy.engine.trainer import Trainer
13from fishy._core.config import TrainingConfig
14from fishy.experiments.unified_trainer import run_unified_training
15from pathlib import Path
16
17PROJECT_ROOT = Path(__file__).resolve().parent.parent
18
19
20def main():
21    print("--- Tutorial 04: Training Engines ---")
22
23    # --- LEVEL 1: Unified Orchestration ---
24    # Good for standard experiments and benchmarking.
25    print("\n--- Level 1: run_unified_training ---")
26    config = TrainingConfig(
27        model="transformer",
28        dataset="species",
29        epochs=2,
30        batch_size=32,
31        wandb_log=False,
32    )
33    results = run_unified_training(config)
34    print(f"Orchestrated Accuracy: {results.get('val_balanced_accuracy', 0):.4f}")
35
36    # --- LEVEL 2: Direct Trainer usage ---
37    # Good for when you have your own model/data but want our optimized loop.
38    print("\n--- Level 2: Custom Trainer Control ---")
39
40    # 1. Create a simple PyTorch model
41    model = nn.Sequential(nn.Linear(100, 64), nn.ReLU(), nn.Linear(64, 2))
42
43    # 2. Setup your own data
44    x = torch.randn(32, 100)
45    y = torch.randint(0, 2, (32,))
46    loader = DataLoader(TensorDataset(x, y), batch_size=8)
47
48    # 3. Use the Trainer class directly
49    trainer = Trainer(
50        model=model,
51        criterion=nn.CrossEntropyLoss(),
52        optimizer=torch.optim.Adam(model.parameters(), lr=1e-3),
53        device=torch.device("cpu"),
54        num_epochs=2,
55    )
56
57    # 4. Execute training
58    # This gives you raw access to epoch logs and best model states.
59    train_res = trainer.train(loader, val_loader=loader)
60    print(f"Manual Trainer Accuracy: {train_res['best_accuracy']:.4f}")
61
62
63if __name__ == "__main__":
64    main()

Automated Benchmarking

How to trigger the full automated benchmark suite used in research papers.

 1# -*- coding: utf-8 -*-
 2"""
 3Tutorial 05: Automated Benchmarking (Run All)
 4---------------------------------------------
 5This tutorial shows how to trigger the full automated benchmark suite
 6used in the paper, covering all datasets and model categories.
 7"""
 8
 9from fishy.experiments.unified_trainer import run_all_benchmarks
10
11
12def main():
13    print("--- Tutorial 05: Automated Benchmarking ---")
14
15    # The `run_all_benchmarks` function is a "one-button" solution to
16    # compare every Classic, Deep, and Evolutionary model against each other.
17    # It performs repeated cross-validation and statistical significance tests.
18
19    # For this example, we use `quick=True` to run a very small subset
20    # (2 models, 1 dataset, 2 runs) instead of the full 30-run suite.
21    print("Launching quick benchmark suite...")
22
23    summary_df = run_all_benchmarks(quick=True, wandb_log=False)
24
25    print("\nBenchmark Summary Table:")
26    print(summary_df.to_string())
27
28
29if __name__ == "__main__":
30    main()

Self-Supervised Pre-training

Demonstrates how to use unlabeled or semi-labeled data to pre-train a model using various self-supervised tasks.

 1# -*- coding: utf-8 -*-
 2"""
 3Tutorial 06: Self-Supervised Pre-training
 4-----------------------------------------
 5This tutorial demonstrates how to use unlabeled or semi-labeled data
 6to pre-train a model using various self-supervised tasks.
 7"""
 8
 9from pathlib import Path
10from fishy._core.config import TrainingConfig
11from fishy.experiments.deep_training import ModelTrainer
12
13PROJECT_ROOT = Path(__file__).resolve().parent.parent
14
15
16def main():
17    print("--- Tutorial 06: Self-Supervised Pre-training ---")
18
19    # 1. Define pre-training tasks in the config
20    # Here we enable Masked Spectra Modelling (MSM) and Denoising (SDA).
21    config = TrainingConfig(
22        model="transformer",
23        dataset="species",
24        epochs=5,
25        masked_spectra_modelling=True,
26        spectrum_denoising_autoencoding=True,
27        wandb_log=False,
28    )
29
30    # 2. Initialize the high-level ModelTrainer
31    trainer = ModelTrainer(config)
32
33    # 3. Run the pre-training phase
34    # This will sequentially execute each enabled task, chaining the weights.
35    print(f"Starting pre-training tasks for {config.model}...")
36    pre_trained_model = trainer.pre_train()
37
38    if pre_trained_model:
39        print("\nPre-training successful! Model is now ready for fine-tuning.")
40
41        # 4. Optional: Proceed to fine-tuning with the learned weights
42        # results = trainer.train(pre_trained_model)
43        # print(f"Fine-tuned Accuracy: {results.get('balanced_accuracy', 0):.4f}")
44
45
46if __name__ == "__main__":
47    main()

Sequential Transfer Learning

How to transfer knowledge from one dataset to another sequentially, using different classes/tasks at each stage.

 1# -*- coding: utf-8 -*-
 2"""
 3Tutorial 07: Sequential Transfer Learning
 4-----------------------------------------
 5This tutorial shows how to transfer knowledge from one dataset to another
 6sequentially, using different classes/tasks at each stage.
 7"""
 8
 9from pathlib import Path
10from fishy.experiments.transfer import run_sequential_transfer_learning
11
12PROJECT_ROOT = Path(__file__).resolve().parent.parent
13
14
15def main():
16    print("--- Tutorial 07: Sequential Transfer Learning ---")
17
18    # We want to train on 'part' (e.g. skin vs fillet)
19    # and then transfer that knowledge to 'species' (e.g. hoki vs mackerel).
20
21    # run_sequential_transfer_learning automates:
22    # 1. Loading the source dataset
23    # 2. Training the model
24    # 3. Swapping the classification head
25    # 4. Loading the target dataset
26    # 5. Fine-tuning the final model
27
28    print("Starting Transfer Learning: [part] -> [species]")
29
30    model, history = run_sequential_transfer_learning(
31        model_name="transformer",
32        transfer_datasets=["part"],
33        target_dataset="species",
34        num_epochs_transfer=5,  # Short run for example
35        num_epochs_finetune=5,
36        batch_size=32,
37        wandb_log=False,
38    )
39
40    print("\nTransfer Learning complete.")
41    print(
42        f"Final accuracy on target dataset: {history['finetune']['species']['val_balanced_acc'][-1]:.2f}%"
43    )
44
45
46if __name__ == "__main__":
47    main()

Probabilistic Inference

Using Bayesian models like Gaussian Processes to get predictions along with uncertainty estimates.

 1# -*- coding: utf-8 -*-
 2"""
 3Tutorial 08: Probabilistic Inference and Uncertainty
 4----------------------------------------------------
 5This tutorial demonstrates how to use Bayesian models like Gaussian Processes
 6to get not just predictions, but also uncertainty estimates.
 7"""
 8
 9from fishy._core.config import TrainingConfig
10from fishy.experiments.classic_training import run_sklearn_experiment
11from fishy.data.module import create_data_module
12from sklearn.model_selection import train_test_split
13from pathlib import Path
14import numpy as np
15
16PROJECT_ROOT = Path(__file__).resolve().parent.parent
17
18
19def main():
20    print("--- Tutorial 08: Probabilistic Inference ---")
21
22    # 1. Configure a Probabilistic Model
23    # We'll use Gaussian Process (GP) which is registered in `models.yaml`.
24    config = TrainingConfig(
25        model="gp",
26        dataset="species",
27        k_folds=2,  # Fast for example
28    )
29
30    # 2. Run the experiment
31    print(f"Training {config.model} with uncertainty estimation...")
32    stats = run_sklearn_experiment(config, "gp", "species")
33
34    print(f"\nGP Mean Accuracy: {stats['val_balanced_accuracy']:.4f}")
35
36    # 3. Manual Uncertainty Inspection
37    # Let's see how we can get uncertainty from the model directly.
38    dm = create_data_module("species")
39    dm.setup()
40    X, y = dm.get_numpy_data(labels_as_indices=True)
41
42    # SHUFFLE to ensure we get both classes in a small subset
43    X_train, X_test, y_train, y_test = train_test_split(
44        X, y, train_size=50, stratify=y, random_state=42
45    )
46
47    # Instantiate the GP class (Scikit-learn wrapper)
48    from fishy.models.probabilistic.gp import GP
49
50    model = GP()
51
52    # Fit on the shuffled subset
53    model.fit(X_train, y_train)
54
55    # Get uncertainty (1.0 - max_prob)
56    uncertainty = model.get_uncertainty(X_test[:5])
57    preds = model.predict(X_test[:5])
58
59    print("\nPredictions with Uncertainty (Test Subset):")
60    for i, (p, u) in enumerate(zip(preds, uncertainty)):
61        print(f"  Sample {i}: Pred={p}, Uncertainty={u:.4f}")
62
63
64if __name__ == "__main__":
65    main()

Outputs and Visualization

Where to find experiment results and how to interpret generated artifacts like logs, metrics, and figures.

 1# -*- coding: utf-8 -*-
 2"""
 3Tutorial 09: Outputs and Visualization
 4--------------------------------------
 5This tutorial explains where to find the results of your experiments
 6and how to interpret the generated artifacts.
 7"""
 8
 9from fishy._core.utils import RunContext
10from pathlib import Path
11
12
13def main():
14    print("--- Tutorial 09: Outputs and Visualization ---")
15
16    # Every time you run an experiment, a `RunContext` is created.
17    # It automatically creates a structured output directory:
18    # outputs/{dataset}/{method}/{model}_{timestamp}/
19
20    ctx = RunContext(dataset="species", method="deep", model_name="transformer")
21
22    print(f"\nRun directory created at: {ctx.run_dir}")
23
24    # 1. Logs: Found in {run_dir}/logs/experiment.log
25    print(f"  Logs:       {ctx.log_dir}")
26
27    # 2. Metrics: Found in {run_dir}/results/metrics.json
28    # These are saved using the custom NumpyEncoder.
29    print(f"  Results:    {ctx.result_dir}")
30
31    # 3. Figures: Found in {run_dir}/figures/
32    # If `figures=True` is set in config, you'll see training curves here.
33    print(f"  Figures:    {ctx.figure_dir}")
34
35    # 4. Checkpoints: Found in {run_dir}/checkpoints/
36    # Best model weights are saved here during training.
37    print(f"  Checkpoints: {ctx.checkpoint_dir}")
38
39    print("\nTo generate these automatically during a run, ensure your config has:")
40    print("  config.benchmark = True")
41    print("  config.figures = True")
42
43
44if __name__ == "__main__":
45    main()