#ifndef _KWS_H
#define _KWS_H

#include "feature_pipeline.h"
#include "kpu.h"
#include "ai_utils.h"
#include "feature_pipeline.h"

/**
 * @class KWS
 * @brief Keyword Spotting (KWS) system for real-time voice wake-up.
 *
 * This class encapsulates the process of real-time keyword spotting using 
 * microphone input. It performs feature extraction, inference, and 
 * post-processing to detect wake words in streaming audio.
 */
class KWS
{
public:
    /**
     * @brief Construct a new KWS object.
     *
     * @param kmodel_file  Path to the KModel file for inference.
     * @param spot_thresh  Confidence threshold for wake word detection.
     * @param debug_mode   Debug mode flag (0 = off, 1 = on).
     */
    KWS(std::string kmodel_file, float spot_thresh, int debug_mode);

    /**
     * @brief Destroy the KWS object and release resources.
     */
    ~KWS();

    /**
     * @brief Preprocesses the input audio waveform to extract features.
     *
     * This function converts raw waveform data into feature representations
     * (e.g., log-Mel filterbanks) suitable for model inference.
     *
     * @param wav Input audio waveform as a vector of floating-point samples.
     */
    void pre_process(std::vector<float> &wav);

    /**
     * @brief Runs the keyword spotting model inference.
     *
     * This function feeds the extracted features into the neural network
     * and performs inference to obtain keyword probabilities.
     */
    void inference();

    /**
     * @brief Postprocesses model output and determines detection result.
     *
     * This function applies thresholding and logic to decide whether
     * a wake word has been detected.
     *
     * @return int Detected keyword index, or -1 if no keyword is detected.
     */
    int post_process();

private:
    int num_bin = 40;                ///< Dimension of extracted audio features.
    int chunk_size = 30;             ///< Inference chunk size (number of frames per inference).
    int hidden_dim = 256;            ///< Hidden layer dimension in the model.
    int cache_dim = 105;             ///< Cache size (last dimension of in_cache tensor), model-specific.
    int num_keyword = 2;             ///< Number of keywords (including "Deactivated").
    float spot_thresh;               ///< Confidence threshold for keyword activation.
    int debug_mode_;                     ///< Debug flag

    runtime_tensor model_input_tensor_wav;    ///< Input tensor for audio feature data.
    runtime_tensor model_input_tensor_cache;  ///< Input tensor for cached model state.

    std::vector<std::vector<float>> cache;    ///< Recurrent cache for streaming inference.

    wenet::FeaturePipelineConfig *feature_config = nullptr; ///< Feature extraction configuration.
    wenet::FeaturePipeline *feature_pipeline = nullptr;     ///< Feature extraction pipeline.

    KPU *kpu = nullptr;               ///< Pointer to KPU (Kendryte Processing Unit) instance.
    AI2D *ai2d_yolo = nullptr;        ///< Pointer to AI2D preprocessor instance.
};

#endif // _KWS_H
