#include <unistd.h>
#include <getopt.h>
#include <time.h>
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <pthread.h>
#include <vector>
#include <memory>
#include <semaphore.h>
#include <queue>
#include <math.h>

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <sched.h>


using namespace std;

#include "common.h"
#include "config.h"
#include "parser.h"
#include "rand.h"

// This include file is generated from the list of modules
// and contains the definitions of all the modules.

#include "modules.h"

#ifdef TEMPLATE_TEST
#include "main.h"
#endif

// This macro is expanded into types defining individual instances.
RPG_EXPAND_DEFINE

using namespace rpg;

#define DEFAULT_ACTIVE_WAIT_TIME 25000
#define NSEC2USEC(x) ((x) / 1000)
#define USEC2NSEC(x) ((x) * 1000)

/** controls the teardown phase where measurements are discarded */
volatile bool rpg_terminate = false;

/** controls whether architectural modules are also measured in isolation
 *
 * changes to this variable have to be reflected in generator's  lang_cpp::generateMeasure
 */
static bool measure_isolated_architectures = false;

/** Mutex protecting the client_times queue */
static pthread_mutex_t client_lock;
/** Priority queue for the timestamps when client finish their think time */
static priority_queue<measure_time_t, vector<measure_time_t>, greater<measure_time_t> > client_times;

/** Semaphore to signal to the main thread that enough samples were collected */;
static sem_t cycle_finish;

static inline
measure_time_t getRealTime (void) {
	return getClockTime (CLOCK_REALTIME);
}

class worker_manager;
class worker;
void* do_work(void* worker_);
void work(worker* worker, module_ptr m);

/** Structure for holding times measured on the root module */
typedef struct work_times {
	/** monotonic timestamp when the work starts */
	measure_time_t monotonic_start;
	/** monotonic timestamp when work is finished */
	measure_time_t monotonic_end;
	/** monotonic time of client arrival */
	measure_time_t client_arrival;

	work_times (measure_time_t _monotonic_start, measure_time_t _monotonic_end, measure_time_t _client_arrival)
		: monotonic_start (_monotonic_start), monotonic_end (_monotonic_end), client_arrival (_client_arrival) { };

} work_times;

class worker {
public:

	worker (int _id, int _cpu_num)
		: id (_id), cpu_num (_cpu_num) {
	}

	void start (worker_manager * _owner) {
		owner = _owner;
		if (pthread_create (&working_thread, NULL, &start_thread, this) != 0) {
			perror ("pthread_create");
			exit (EXIT_FAILURE);
		}

		if (cpu_num >= 0) {
			cpu_set_t cpuSet;
			CPU_ZERO (&cpuSet);
			CPU_SET (cpu_num, &cpuSet);
			if (pthread_setaffinity_np (working_thread, sizeof (cpu_set_t), &cpuSet) != 0) {
				perror ("pthread_setaffinity_np");
				exit (EXIT_FAILURE);
			}
		}
	}

	void join (void) {
		pthread_join (working_thread, NULL);
	}

	bool timedjoin (const struct timespec & timeout) {
		int s = pthread_timedjoin_np (working_thread, NULL, &timeout);
		if (s != 0) {
			error ("Timeout waiting for worker thread!");
			return false;
		} else {
			return true;
		}
	}

	/*** Print request processing times collected during work */
	void print_times (ostream& o) {
		for (std::vector<work_times> :: iterator iter = processing_times.begin (); iter != processing_times.end (); iter++) {
			output (o, GROUP_APP, OUTPUT_CONTEXT_SHARED, OUTPUT_MONOTONIC_MEASURE, iter->monotonic_end - iter->monotonic_start);
			output (o, GROUP_APP, OUTPUT_CONTEXT_SHARED, OUTPUT_MONOTONIC_MEASURE "_end", iter->monotonic_end);
			output (o, GROUP_APP, OUTPUT_CONTEXT_SHARED, OUTPUT_MONOTONIC_MEASURE "_client", iter->monotonic_end - iter->client_arrival);
		}
	}

private:

	/** Check if client_time is due and wait if necessary */
	void wait_for_client_request () {
		const measure_time_t current_time = getMonotonicTime ();

		/* For clients that already expired, we can immediately serve them */
		if (current_client_arrival < current_time) {
			return;
		} else {
			/* For clients not yet expired, we just wait for their due time.
			 * Note that it is not a problem if another client appears with
			 * a shorter due time than this one, because this only happens
			 * when the worker that finished the client and generated its new
			 * time, so it is also free to process it.
			 */
			struct timespec target_time;
			measure_time_to_timespec (current_client_arrival, target_time);
			if (clock_nanosleep (CLOCK_MONOTONIC, TIMER_ABSTIME, &target_time, NULL) != 0) {
				perror ("clock_nanosleep");
				exit (EXIT_FAILURE);
			}
		}
	}

	/** Vector for storing timestamps of starting and finishing a client request processing */
	std::vector<work_times> processing_times;

	/** The owning worker_manager */
	worker_manager * owner;

	/** Identifier of the working thread */
	pthread_t working_thread;

	/** Arrival time of the current client */
	measure_time_t current_client_arrival;

	/** ID of the worker */
	int id;

	/** Number of the cpu for affinity setting */
	int cpu_num;

	bool should_finish (void) {
		return rpg_terminate;
	}

	/* Thunk to allow pthread_create to start an worker instance's method */
	static void *start_thread (void *obj) {
		return reinterpret_cast<worker *>(obj)->run ();
	}

	/** The worker's main function
	 *
	 * Quite long, definition thus below
	 */
	void *run ();

	/** Perform the measured work on a given root module
	 *
	 * Definition below worker_manager due to otherwise incomplete type
	 */
	void work (module_ptr m);
};

class worker_manager {
private:
	worker ** workers;
	unsigned int thread_count;

	unsigned int clients;
	unsigned int client_time_ex;

	/** Measuring of cycles of work routine */
	volatile unsigned int cycle_count;

	unsigned int min_cycle_count;

	/** Internal method, should be synchronized on client_lock */
	inline void _generate_client_request (void) {
		const measure_time_t client_time = (measure_time_t) USEC2NSEC (rand_exp ((long double) client_time_ex));
		measure_time_t time = getMonotonicTime () + client_time;
		client_times.push(time);
	}

	/** Internal method, should be synchronized on client_lock */
	measure_time_t _get_client_request (void) {

		if (!client_times.empty()) {
			/* take the client that's on top of the queue and return its time */
			measure_time_t client_time = client_times.top();
			client_times.pop();
			return client_time;
		} else {
			// if the queue is empty, it means something's wrong
			cerr << "no clients" << endl;
			exit(EXIT_FAILURE);
		}
	}

public:

	/** Timestamp (in real time) of the moment where isolated measurements end
	 * and client processing starts
	 */
	measure_time_t client_start_time;

	/**
	 * One worker thread calls this to generate the first bunch of clients
	 * and denote start of non-isolated work.
	 */
	void start_client_requests(void) {
		pthread_mutex_lock(&client_lock);

		for (unsigned int i = 0; i < clients; i++) {
			_generate_client_request();
		}

		pthread_mutex_unlock(&client_lock);

		/* get timestamp for main thread's accounting */
		client_start_time = getRealTime();
	}

	/** Worker threads calls this to finish serving one client and get a new one */
	measure_time_t get_next_client_request(void) {
		pthread_mutex_lock(&client_lock);

		/* Generate new time for the client that was just served */
		_generate_client_request();

		/* Get the client with earliest due time */
		const measure_time_t client_time = _get_client_request();

		pthread_mutex_unlock(&client_lock);

		return client_time;
	}

	/** Worker thread calls this to get its first client */
	measure_time_t get_first_client_request(void) {
		pthread_mutex_lock(&client_lock);

		/* Get the client with earliest due time */
		const measure_time_t client_time = _get_client_request();

		pthread_mutex_unlock(&client_lock);

		return client_time;
	}

	void start (unsigned int thread_count_, unsigned int ctime, unsigned int ccount, unsigned int _min_cycle_count, bool bind_threads) {
		rpg_terminate = false;
		cycle_count = 0;
		min_cycle_count = _min_cycle_count;

		clients = ccount;
		client_time_ex = ctime;

		/* the performance model needs to know this */
		output (cout, GROUP_APP, OUTPUT_CONTEXT_CONFIG, ITEM_CLIENT_COUNT, ccount);
		output (cout, GROUP_APP, OUTPUT_CONTEXT_CONFIG, ITEM_THREAD_COUNT, thread_count_);
		output (cout, GROUP_APP, OUTPUT_CONTEXT_CONFIG, ITEM_CLIENT_WAIT_TIME_EX, client_time_ex);

		/* create threads and start them */
		thread_count = thread_count_;
		workers = new worker * [thread_count];

		/* determine which processor cores are available and store them in an array */
		int num_avail_cpus = 0;
		int * avail_cpus = NULL;

		if (bind_threads) {
			cpu_set_t cpuSet;
			CPU_ZERO (&cpuSet);
			if (sched_getaffinity (0, sizeof(cpu_set_t), &cpuSet) != 0) {
				/* not fatal, continue */
				perror ("sched_getaffinity");
				exit (EXIT_FAILURE);
			}

			num_avail_cpus = CPU_COUNT (&cpuSet);
			avail_cpus = new int [num_avail_cpus];

			/* go through all possible cpu id's and store those that are available */
			int avail = 0;
			for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) {
				if (CPU_ISSET (cpu, &cpuSet)) {
					avail_cpus [avail] = cpu;
					avail++;
					/* shortcut the end of the search when we found all available already */
					if (avail == num_avail_cpus) {
						break;
					}
				}
			}
		}

		for (unsigned int i = 0; i < thread_count; i++) {
			if (bind_threads) {
				/* cycle through the available cores */
				workers[i] = new worker (i, avail_cpus [i % num_avail_cpus]);
			} else {
				workers[i] = new worker (i, -1);
			}
			workers[i]->start(this);
		}

		delete[] avail_cpus;
	}

	/** Wait for the workers to finish, or timeout on max_time. */
	void wait_for_workers (unsigned int min_time, unsigned int max_time) {
		/*
		 * Now we need to determine when to end the application.
		 * First we get the current time as the start time.
		 * Note that it has to be real time because sem_timedwait
		 * cannot use any other (e.g. monotonic) clock!
		 */
		struct timespec start_time;
		if (clock_gettime (CLOCK_REALTIME, &start_time) != 0) {
			perror ("Cannot measure time");
			exit (EXIT_FAILURE);
		}

		/*
		 * First we wait until there are enough cycles collected.
		 * We have a semaphore that worker thread adding the last needed cycle raises.
		 * If there is a max_time limit, limit the waiting on the semaphore.
		 */
		if (max_time != 0) {
			// create the absolute timestamp of the app timeout
			struct timespec end_time;
			// max time is in seconds
			end_time.tv_sec = start_time.tv_sec + max_time;
			end_time.tv_nsec = start_time.tv_nsec;

			// wait for either the semaphore or timeout
			if (sem_timedwait (&cycle_finish, &end_time) != 0) {
				if (errno == ETIMEDOUT) {
					error ("Application timeout when waiting for enough cycles");
					rpg_terminate = true;
				} else {
					perror ("sem_timedwait");
					exit (EXIT_FAILURE);
				}
			}
		} else {
			if (sem_wait (&cycle_finish) != 0) {
				perror ("sem_wait");
				exit (EXIT_FAILURE);
			}
		}

		/* If there wasn't a timeout already */
		if (!rpg_terminate) {
			/*
			 * We collected enough samples, now we should wait to achieve minimal time
			 * which should be counted from the manager's client_start_time (excluding initialization)
			 * But the max_time is counted from the start_time timestamp, so we need to see which
			 * deadline is earlier
			 */
			struct timespec end_time;
			/* min_time and max_time is in seconds */
			if (max_time != 0 &&
					timespec_to_measure_time (start_time) + max_time * NANOSECONDS_IN_SEC
					< client_start_time + min_time * NANOSECONDS_IN_SEC) {
				// max_time is earlier
				end_time = start_time;
				end_time.tv_sec += max_time;
				error ("Application timeout is earlier than min_time");
			} else {
				// base end_time from client_start_time
				measure_time_to_timespec (client_start_time, end_time);
				end_time.tv_sec += min_time;
			}

			/* Is there even a reason to wait? If yes, wait... */
			if (timespec_to_measure_time (end_time) > getRealTime ()) {
				if (clock_nanosleep (CLOCK_REALTIME, TIMER_ABSTIME, &end_time, NULL) != 0) {
					perror ("clock_nanosleep");
					exit (EXIT_FAILURE);
				}
			}
		}

		/* Either way, time to end the workers */
		/* denote that the shared mode has stopped, to notify e.g. pfmon measurements*/
		ofstream shared ("./triggers/shared.stop", ios::out);
		if (!shared.fail()) {
			shared.close();
		}

	}

	bool terminate(bool wait) {
		rpg_terminate = true;

        if (wait) {

            struct timespec ts;

			if (clock_gettime (CLOCK_REALTIME, &ts) == -1) {
			 	error ("clock_gettime failed");
			 	exit (EXIT_FAILURE);
			}

			/* TODO: configurable variable? */
			ts.tv_sec += 120;

			for (unsigned int i = 0; i < thread_count; i++) {
				if (!workers[i]->timedjoin(ts)) {
					// TODO: use thread_cancel?
					return false;
				}
			}

        } else {

			for (unsigned int i = 0; i < thread_count; i++) {
				workers[i]->join();
			}

        }

        return true;
	}

	void print_times (ostream& o) {
		for (unsigned int i = 0; i < thread_count; i++) {
			workers[i]->print_times(o);
		}
	}

	void clean() {
		delete[] workers;
	}

	/** Update cycle count and notify the main thread when requested minimum was achieved */
	void add_cycle () {
		if (__sync_add_and_fetch (&cycle_count, 1) == min_cycle_count) {
			sem_post (&cycle_finish);
		}
	}
};

void *worker::run ()
{
	processing_times.reserve (max_measured_values);

	// Declare and initialize individual module instances. The initialization is made under the
	// exclusive mutex to cover cases where legacy modules access static variables not protected
	// by locks.
	lock_exclusive_mutex ();
	RPG_EXPAND_DECLARE
	RPG_EXPAND_INIT
	unlock_exclusive_mutex ();

	// Before the isolated measurement, print implementation-specific module configuration for
	// the module transformation purposes. Only one thread prints this as module instances
	// in each thread are the same.
	if (wait_barrier() == PTHREAD_BARRIER_SERIAL_THREAD) {
		RPG_EXPAND_PRINTCONFIG
	}

	// The chosen thread should finish printing before isolated measurements.
	wait_barrier ();

	// Perform isolated measurements. The code that drives the isolated measurements
	// is, somewhat unusually, inside the modules, because expanding it here would
	// make the generator a bit more complex.
	//
	// The isolation is achieved by the exclusive mutex.
	lock_exclusive_mutex ();
	RPG_EXPAND_MEASURE
	unlock_exclusive_mutex ();

	// Wait for the isolated measurements to be finished.
	wait_barrier ();

	// Reset the collected measurements before the shared work commences.
	RPG_EXPAND_CLEAR

	// Wait for the reset to be finished. Afterwards, one of
	// the threads will generate the initial client requests in the queue
	// and start measuring the client time.
	if (wait_barrier () == PTHREAD_BARRIER_SERIAL_THREAD)
	{
		// denote that the shared mode has started, to trigger e.g. pfmon measurements
		ofstream shared ("./triggers/shared.start", ios::out);
		if (!shared.fail()) {
			shared.close();
		}

		owner->start_client_requests ();
	}

	// We do one more barrier call to make sure the threads will ask for
	// client requests only after they are really generated. This wait
	// should not be long and therefore should not influence the
	// already running client time too much.
	//
	// Anyway. Either we start measuring the client time before the barrier,
	// including some wait time, or we start measuring the client time after
	// the barrier, potentially excluding some processing time ...
	wait_barrier ();

	// Get the first client.
	current_client_arrival = owner->get_first_client_request ();

	while (!should_finish ())
	{
		// Wait for the client if needed.
		wait_for_client_request ();
		try
		{
			RPG_EXPAND_WORK
		}
		catch (rpg_exception_t e)
		{
			exit (EXIT_FAILURE);
		}

		// Generate and get the next client.
		current_client_arrival = owner->get_next_client_request ();
	}

	// We know that we should terminate, but not all threads get the information
	// at the same time. Wait for them, the measurements are no longer collected
	// so it does not matter that we do not generate the workload.
	wait_barrier ();

	// And now just print the results of the measurements.
	RPG_EXPAND_PRINTTIMES

	// Wait for all threads to finish printing
	wait_barrier ();

	// Deinitialize the modules. Since module deinitialization can be potentially
	// thread unsafe, deinitialize is protected by mutex
	lock_exclusive_mutex ();

	RPG_EXPAND_DEINIT

	unlock_exclusive_mutex ();

	return NULL;
}

void worker::work (module_ptr m)
{
    // The time is collected both here and inside the module. Here, absolute
    // times are recorded (to make it possible to correctly calculate
    // client perceived timing behavior).
	measure_time_t mon_start = getMonotonicTime ();
	int session_state = RETURN_OK;
	m->measured_work (session_state);
	measure_time_t mon_end = getMonotonicTime ();

	// Time is not collected when terminating, some threads might have already finished.
	if (!should_finish() && processing_times.size() < max_measured_values)
	{
		processing_times.push_back (work_times (mon_start, mon_end, current_client_arrival));
	}

	// Update the cycle count and notify the main thread when the requested minimum was achieved.
	owner->add_cycle ();
}

static struct option long_opts[] = {
	{"help",					no_argument,	NULL, 'h'},
	{"measure-architectures",	no_argument,	NULL, 'm'},
	{"bind-threads",			no_argument,	NULL, 'b'},
	{0, 0, 0, 0}
};

static const char * opts_help [] = {
	"show this help",
	"measure also architectural modules in isolation",
	"bind processing threads to processor cores",
	NULL
};


static void print_usage () {
	struct option * opt = long_opts;
	const char ** help = opts_help;

	cerr << "Available command line options: " << endl;

	while (opt->name != NULL) {
		cerr << "--" << opt->name << " (-" << (char) opt->val << ")" << " - " << *help << endl;
		opt++;
		help++;
	}
}

int main(int argc, char* argv[]) {

	bool bind_threads = false;

	while (true) {
		int c;
		int opt_index = 0;

		c = getopt_long(argc, argv, "hmb", long_opts, &opt_index);

		if (c == -1) {
			break;
		}

		switch (c) {
			case 'h':
				print_usage();
				exit (EXIT_FAILURE);
			case 'm':
				measure_isolated_architectures = true;
				break;
			case 'b':
				bind_threads = true;
				break;
			case '?':
				cerr << "unknown option: " << argv[optind] << endl;
				print_usage ();
				exit(EXIT_FAILURE);
				break;
			default:
				cerr << "unexpected getopt_long() result: " << c << endl;
				print_usage ();
				exit(EXIT_FAILURE);
		}
	}

	/* if we have too many modules and allocate too much memory, we want to be killed first */
	ofstream oom("/proc/self/oom_adj", ios::out);
	if (!oom.fail()) {
		oom << "15" << endl;
		oom.close();
	}

	/* determine number of available cores and output it for the performance model */
	cpu_set_t cpuSet;
	CPU_ZERO (&cpuSet);
	if (sched_getaffinity (0, sizeof(cpu_set_t), &cpuSet) == 0) {
		output (cout, GROUP_APP, OUTPUT_CONTEXT_CONFIG, ITEM_CORES_COUNT, CPU_COUNT (&cpuSet));
	} else {
		/* not fatal, continue */
		perror ("sched_getaffinity");
	}

	/* first, parse default configuration file, if it exists */
	FILE* input = fopen(OUTPUT_CONFIG_FILE, "r");
	if (input == NULL) {
		error("Cannot read the configuration file.");
		return -1;
	}
	
	rpg_context_t context(sprovider, cprovider);

	/* try to parse configuration file */
	config_set_in(input);
	if (config_parse(context) != 0 || errors != 0) {
		error("Error parsing the configuration file.");
		fclose(input);
		return EXIT_FAILURE;
	}

	/* parse done! */
	fclose(input);
	
	/* free lex structures */
	config_lex_destroy();

	unsigned int mincycles;
	unsigned int min_time;
	unsigned int threads;
	unsigned int ctime;
	unsigned int ccount;
	unsigned int max_time;
	
	try {
		rpg_data_source_t mc(sprovider.createSource(GROUP_APP,				ITEM_MIN_CYCLES));
		rpg_data_source_t mt(sprovider.createSource(GROUP_APP,				ITEM_MIN_TIME));
		rpg_data_source_t th(sprovider.createSource(GROUP_APP,				ITEM_THREAD_COUNT));
		rpg_data_source_t client_time(sprovider.createSource(GROUP_APP,		ITEM_CLIENT_WAIT_TIME_EX));
		rpg_data_source_t client_count(sprovider.createSource(GROUP_APP,	ITEM_CLIENT_COUNT));
		rpg_data_source_t tlimit( sprovider.createSource(GROUP_APP, 		ITEM_TIME_LIMIT));
		rpg_data_source_t m_values( sprovider.createSource(GROUP_APP, 		ITEM_VALUES_STORAGE_MAX_SIZE));
		rpg_data_source_t seed_gen (sprovider.createSource(GROUP_CORE,		ITEM_RANDOM_SEED));

		mincycles 	        = mc->getIntValue ();
		min_time 	        = mt->getIntValue ();
		threads  	        = th->getIntValue ();
		ctime		        = client_time->getIntValue ();
		ccount		        = client_count->getIntValue ();
		max_measured_values = m_values->getIntValue ();;
		max_time            = tlimit->getIntValue ();
		rand_init(seed_gen->getIntValue ());
	} catch (rpg_exception_t e) {
		error("Cannot load default config!");
		return EXIT_FAILURE;
	}


	if (pthread_mutex_init(&client_lock, NULL) != 0) {
		perror("Cannot initialize mutex");
		exit(EXIT_FAILURE);
	};

	if (sem_init (&cycle_finish, 0, 0) != 0) {
		perror("Cannot initialize semaphore");
		exit(EXIT_FAILURE);
	}

	// having more threads than clients has no benefit
	// and would fail in worker_manager::_get_client_request()
	if (ccount < threads) {
		cerr << "Warning: there are fewer clients than threads, reducing number of threads to " << ccount << endl;
		threads = ccount;
	}

	modules_init (threads);
	worker_manager manager;

	struct timeval random_start;
	gettimeofday (&random_start, NULL);
	srand (random_start.tv_sec + random_start.tv_usec);

	// start the worker threads
	manager.start (threads, ctime, ccount, mincycles, bind_threads);

	// wait for the worker threads to generate enough cycles and run time, or timeout waiting for that
	manager.wait_for_workers (min_time, max_time);

    // try to finish gracefully
    bool graceful = manager.terminate (true);
	if (!graceful) {
		error ("Threads did not join within timeout, doing abrupt exit.");
		exit (EXIT_FAILURE);
	}

	manager.print_times (cout);
	manager.clean ();

	modules_destroy ();
	pthread_mutex_destroy (&client_lock);
	sem_destroy (&cycle_finish);

	return EXIT_SUCCESS;
}
