#pragma once

#ifdef WINDOWS
	#ifdef EXAMPLE_EXPORT
		#define EXAMPLE_API __declspec(dllexport)
	#else
		#define EXAMPLE_API __declspec(dllimport)
	#endif
#else
	#define EXAMPLE_API
#endif

// we have to use 'extern "C"' in order to export functions from DLL to be used
// in MQL5 code.
// Using 'namespace' or without such extern won't make it work for MQL5 code, it
// won't be able to find such functions.
extern "C" {
	/**
	 * Add two specified number together.
	 */
	EXAMPLE_API [[nodiscard]] int add(int a, int b) noexcept;

	/**
	 * Subtract two specified number.
	 */
	EXAMPLE_API [[nodiscard]] int sub(int a, int b) noexcept;

	/**
	 * Get the total number of hardware's concurrency.
	 */
	EXAMPLE_API [[nodiscard]] int num_hardware_concurrency() noexcept;

	/**
	 * Sum all elements from specified array for number of specified elements.
	 * The computation will be done in a single thread linearly manner.
	 */
	EXAMPLE_API [[nodiscard]] int single_threaded_sum(const int arr[], int num_elem);

	/**
	 * Sum all elements from specified array for number of specified elements.
	 * The computation will be done in a single thread.
	 *
	 * This version takes into full consideration to avoid false sharing by
	 * copying working chunk of data for each thread feeding to each thread.
	 * Although there is no write on shared data, but read only, there is still
	 * performance hit but much less siginificant. In short, it might be too
	 * much care of false sharing avoidance. It mostly suitable for processor
	 * that bases on MSI cache coherence protocol.
	 */
	//EXAMPLE_API [[nodiscard]] int multi_threaded_sum_v1(const int* const& arr, int num_elem);

	/**
	 * Sum all elements from specified array for number of specified elements.
	 * The computation will be done in a multi-thread.
	 *
	 * This version is suitable for processor that bases on MESI cache coherence
	 * protocol. It won't make a copy of input array of data, but instead share
	 * it among all threads for reading purpose. It still attempt to write both
	 * temporary and final result with minimal number of times thus minimally
	 * affect the performance.
	 */
	EXAMPLE_API [[nodiscard]] int multi_threaded_sum_v2(const int arr[], int num_elem);
};
