#include <iostream>
#include <fstream>
#include <cmath>
#include <ai2d.h>
#include <opencv2/opencv.hpp>

// Test image path and size
std::string image_path="test.jpg";
size_t img_w=1024;
size_t img_h=1024;

void setup() {
  // Load input image using OpenCV
  cv::Mat input_image=cv::imread(image_path);

  // Create input tensor from image data
  dims_t input_shape{1,img_h,img_w,3};
  runtime_tensor input_tensor = host_runtime_tensor::create(
      typecode_t::dt_uint8,
      input_shape,
      { (gsl::byte *)input_image.data, input_shape[0]*input_shape[1]*input_shape[2]*input_shape[3]},
      true,
      hrt::pool_shared
  ).expect("cannot create input tensor");
  hrt::sync(input_tensor, sync_op_t::sync_write_back, true).unwrap();

  /************* Resize Test *************/
  AI2D ai2d_resize=AI2D();
  // Configure dtype: input/output are RGB packed format, uint8
  ai2d_resize.set_ai2d_dtype(ai2d_format::RGB_packed, ai2d_format::RGB_packed,
                             typecode_t::dt_uint8, typecode_t::dt_uint8);
  // Set resize interpolation method
  ai2d_resize.set_resize(ai2d_interp_method::tf_bilinear, ai2d_interp_mode::half_pixel);
  dims_t resize_input_shape{1,img_h,img_w,3};
  dims_t resize_output_shape{1,320,320,3};
  ai2d_resize.build(resize_input_shape,resize_output_shape);
  runtime_tensor ai2d_resize_tensor = host_runtime_tensor::create(typecode_t::dt_uint8, resize_output_shape, hrt::pool_shared).expect("cannot create input tensor");
  ai2d_resize.run(input_tensor,ai2d_resize_tensor);

  // Save resize result
  auto resize_input_buf = ai2d_resize_tensor.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_write).unwrap().buffer();
  char* resize_output_data=reinterpret_cast<char *>(resize_input_buf.data());
  cv::Mat resize_out_img(320, 320, CV_8UC3, resize_output_data);
  cv::imwrite("resize_output.jpg", resize_out_img);

  /************* Padding Test *************/
  AI2D ai2d_pad=AI2D();
  ai2d_pad.set_ai2d_dtype(ai2d_format::RGB_packed, ai2d_format::RGB_packed,
                          typecode_t::dt_uint8, typecode_t::dt_uint8);
  // Pad params: top=20, bottom=20, left=30, right=30, constant color=128,128,128
  std::vector<int> pad_param = {0, 0, 0, 0,20,20,30,30};
  ai2d_pad.set_pad(pad_param, ai2d_pad_mode::constant, {128,128,128});
  dims_t pad_input_shape{1,img_h,img_w,3};
  dims_t pad_output_shape{1,img_h+40,img_w+60,3};
  ai2d_pad.build(pad_input_shape,pad_output_shape);
  runtime_tensor ai2d_pad_tensor = host_runtime_tensor::create(typecode_t::dt_uint8, pad_output_shape, hrt::pool_shared).expect("cannot create input tensor");
  ai2d_pad.run(input_tensor,ai2d_pad_tensor);

  // Save pad result
  auto pad_input_buf = ai2d_pad_tensor.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_write).unwrap().buffer();
  char* pad_output_data=reinterpret_cast<char *>(pad_input_buf.data());
  cv::Mat pad_out_img(img_h+40,img_w+60, CV_8UC3, pad_output_data);
  cv::imwrite("pad_output.jpg", pad_out_img);
  
  /************* Cropping Test *************/
  AI2D ai2d_crop=AI2D();
  ai2d_crop.set_ai2d_dtype(ai2d_format::RGB_packed, ai2d_format::RGB_packed,
                           typecode_t::dt_uint8, typecode_t::dt_uint8);
  // Crop region: start=(100,100), size=(500x500)
  size_t crop_x=100;
  size_t crop_y=100;
  size_t crop_w=500;
  size_t crop_h=500;
  ai2d_crop.set_crop(crop_x,crop_y,crop_w,crop_h);
  dims_t crop_input_shape{1,img_h,img_w,3};
  dims_t crop_output_shape{1,500,500,3};
  ai2d_crop.build(crop_input_shape,crop_output_shape);
  runtime_tensor ai2d_crop_tensor = host_runtime_tensor::create(typecode_t::dt_uint8, crop_output_shape, hrt::pool_shared).expect("cannot create input tensor");
  ai2d_crop.run(input_tensor,ai2d_crop_tensor);

  // Save crop result
  auto crop_input_buf = ai2d_crop_tensor.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_write).unwrap().buffer();
  char* crop_output_data=reinterpret_cast<char *>(crop_input_buf.data());
  cv::Mat crop_out_img(500,500, CV_8UC3, crop_output_data);
  cv::imwrite("crop_output.jpg", crop_out_img);

  /************* Affine Transform Test *************/
  AI2D ai2d_affine=AI2D();
  ai2d_affine.set_ai2d_dtype(ai2d_format::RGB_packed, ai2d_format::RGB_packed,
                             typecode_t::dt_uint8, typecode_t::dt_uint8);
  // Affine matrix (scale 0.5 and translate by 200)
  std::vector<float> affine_matrix = {0.5,0.0,200.0,
                                      0.0,0.5,200.0};
  ai2d_affine.set_affine(ai2d_interp_method::cv2_bilinear, 0, 0, 127, 1, affine_matrix);
  dims_t affine_input_shape{1,img_h,img_w,3};
  dims_t affine_output_shape{1,img_h/2,img_w/2,3};
  ai2d_affine.build(affine_input_shape,affine_output_shape);
  runtime_tensor ai2d_affine_tensor = host_runtime_tensor::create(typecode_t::dt_uint8, affine_output_shape, hrt::pool_shared).expect("cannot create input tensor");
  ai2d_affine.run(input_tensor,ai2d_affine_tensor);

  // Save affine result
  auto affine_input_buf = ai2d_affine_tensor.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_write).unwrap().buffer();
  char* affine_output_data=reinterpret_cast<char *>(affine_input_buf.data());
  cv::Mat affine_out_img(img_h/2,img_w/2, CV_8UC3, affine_output_data);
  cv::imwrite("affine_output.jpg", affine_out_img);

  /************* Shift (Bit Shift) Test *************/
  // Create a synthetic 16-bit image with constant value
  cv::Mat shift_img(320, 320, CV_16UC3, cv::Scalar(120, 120, 120));
  cv::imwrite("shift_ori.jpg", shift_img);

  AI2D ai2d_shift=AI2D();
  ai2d_shift.set_ai2d_dtype(ai2d_format::RAW16, ai2d_format::RAW16,
                            typecode_t::dt_uint16, typecode_t::dt_uint16);
  // Shift right by 1 bit
  ai2d_shift.set_shift(1);
  dims_t shift_input_shape{1,320,320,3};
  dims_t shift_output_shape{1,320,320,3};
  ai2d_shift.build(shift_input_shape,shift_output_shape);

  runtime_tensor shift_input_tensor = host_runtime_tensor::create(
      typecode_t::dt_uint16,
      shift_input_shape,
      { (gsl::byte *)shift_img.data, 320*320*3*sizeof(uint16_t)},
      true,
      hrt::pool_shared
  ).expect("cannot create input tensor");
  hrt::sync(shift_input_tensor, sync_op_t::sync_write_back, true).unwrap();
  
  runtime_tensor ai2d_shift_tensor = host_runtime_tensor::create(typecode_t::dt_uint16, shift_output_shape, hrt::pool_shared).expect("cannot create input tensor");
  ai2d_shift.run(shift_input_tensor,ai2d_shift_tensor);

  // Save shift result
  auto shift_input_buf = ai2d_shift_tensor.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_write).unwrap().buffer();
  char* shift_output_data=reinterpret_cast<char *>(shift_input_buf.data());
  cv::Mat shift_out_img(320,320, CV_16UC3, shift_output_data);
  cv::imwrite("shift_output.jpg", shift_out_img);
}

void loop() {
  // Empty loop for Arduino-style structure
}
