#include <set>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <string>
#include "sys/times.h"
#include <chrono>
#include <vector>
#include <array>
#include <algorithm>

#include <sdsl/int_vector.hpp>
#include <sdsl/bit_vectors.hpp>
#include <sdsl/util.hpp>
#include <sdsl/rank_support.hpp>
#include <sdsl/select_support.hpp>
#include <sdsl/suffix_arrays.hpp>

extern "C" {
        #include "huffman.h"
}

uint32_t totNodes;

huffman_node_t *readCompressedHuffB2(const std::string path, bit_file_t **bInFile){
    const std::string b2Path = path + ".B2.bin.huffman";

    FILE *inFile = NULL;

    if ((inFile = fopen(b2Path.c_str(), "rb")) == NULL){
        std::cout<<" cannot read B2 huffman file\n";
	exit(1);
    }

    huffman_node_t *ht = NULL;

    HuffmanDecodeToBytes(inFile, bInFile, &ht);

    if(ht == NULL || bInFile == NULL){
            std::cout<<" ht is NULL o bb es NULL\n";
            exit(1);
    }
    return ht;
}

void readCompressed(const std::string path, sdsl::wm_int<sdsl::rrr_vector<15>> &x_wm,
    sdsl::rrr_vector<63> &b1_rrr,
    sdsl::wm_int<sdsl::rrr_vector<15>> &y_wm)
{
    // Path to sequences
    const std::string xPath = path + ".X.bin-wm_int.sdsl";
    const std::string b1Path = path + ".B1-rrr-64.sdsl";
    const std::string yPath = path + ".Y.bin.huff.bin-wm_int.sdsl";

    // Read compressed files
    load_from_file(x_wm, xPath.c_str());
    load_from_file(b1_rrr, b1Path.c_str());
    load_from_file(y_wm, yPath.c_str());

    return;
}

void getSeq(sdsl::wm_int<sdsl::rrr_vector<15>> &x_wm,
    sdsl::rrr_vector<63>::rank_1_type &b1_rank, sdsl::rrr_vector<63>::select_1_type &b1_select,
    huffman_node_t *ht, bit_file_t *bInFile,
    sdsl::wm_int<sdsl::rrr_vector<15>> &ywm,
    uint32_t ylen,
    std::vector<std::unordered_set<uint32_t>> &graph)
{
    // get neighbors by each partition
    uint32_t b1size = b1_rank.size();
    uint32_t ps = b1_rank(b1size) - 1;
    std::cerr<<" Total Partitions "<<ps<<" ylen "<<ylen<<" graph size "<<graph.size()<<"\n";
    std::vector<uint32_t> yRAM(ylen);
    #pragma omp parallel for simd
    for (uint32_t i=0; i<ylen; i++){
	yRAM[i] = ywm[i];
    }

    for (uint32_t partitionNumber = 1; partitionNumber < ylen; ++partitionNumber)
    {
        uint32_t partitionIndex = b1_select(partitionNumber);
        const uint32_t nextPartitionIndex = b1_select(partitionNumber + 1);
        uint32_t current_Y = yRAM[partitionNumber-1];
        uint32_t nextp_Y = yRAM[partitionNumber];

        const uint32_t psize = nextPartitionIndex - partitionIndex;
        std::vector<uint32_t> xRAM(psize), x2RAM(psize);
	#pragma omp parallel for simd
        for(uint32_t i = 0; i < psize; ++i)
        {
            uint32_t uno = x_wm[i+partitionIndex];
	    xRAM[i] = uno;
            x2RAM[i] = uno;
	}
  	int numb2 = 0;
	std::vector<unsigned char> b2RAM(nextp_Y-current_Y);
	unsigned char *ptr = b2RAM.data();
        huffman_node_t *htT = ht;
	bit_file_t *bInFileT;
        HuffmanBitFileRef(&bInFileT, bInFile);
	if(bInFileT == NULL){
		std::cout<<"ERROR bInFileT is null\n";
	}
	
	int ret = HuffmanDecodePartition(&ptr, current_Y, nextp_Y, &numb2, htT, bInFileT);
	uint32_t bytesPerNode = numb2/psize;
	if(bytesPerNode*psize != numb2){
		std::cout<<"ERROR ";
	    	std::cout<<" numb2 "<<numb2<<" partitionNumber "<<partitionNumber<<" psize "<<psize<<"\n";
		exit(1);
	}

        for(uint32_t xI1 = partitionIndex; xI1 < nextPartitionIndex; xI1++)
        {
              const uint32_t currentByteIndex = bytesPerNode * (xI1 - partitionIndex);
              for(uint32_t xI = partitionIndex; xI < nextPartitionIndex; ++xI)
              {
                const uint32_t b2xIbyteIndex = bytesPerNode * (xI - partitionIndex);
	    	for(uint32_t bytesChecked = 0; bytesChecked<bytesPerNode; ++bytesChecked){
              		const uint8_t maskByteOfCurrent = (uint8_t) b2RAM[currentByteIndex + bytesChecked];
                	const uint8_t maskBytePossibleNeighbor = (uint8_t) b2RAM[b2xIbyteIndex + bytesChecked];

                        if(maskByteOfCurrent & maskBytePossibleNeighbor)
                        {
                            	graph[xRAM[xI1-partitionIndex]].insert(x2RAM[xI-partitionIndex]);
			}
		}
	      }
	 }
    }


    for (uint32_t partitionNumber = ylen; partitionNumber <= ps; ++partitionNumber)
    {
        uint32_t partitionIndex = b1_select(partitionNumber);
        const uint32_t nextPartitionIndex = b1_select(partitionNumber + 1);
	
   	const uint32_t psize = nextPartitionIndex - partitionIndex;
        std::vector<uint32_t> xRAM(psize), x2RAM(psize);
	#pragma omp parallel for simd
        for(uint32_t i = 0; i < psize; ++i)
        {
            uint32_t uno = x_wm[i+partitionIndex];
            xRAM[i] = uno;
	    x2RAM[i] = uno;
        }
	
        #pragma omp parallel for simd 
        for (uint32_t xI = 0; xI < psize; xI++)
        {
            for (uint32_t cn = 0; cn < psize; cn++)
            {
            		graph[xRAM[xI]].insert(x2RAM[cn]);
	    }
        }

    }


}

int main(int argc, char const *argv[])
{
    if(3 > argc)
    {
        std::cerr << "Modo de uso: " << argv[0] << " RUTA_BASE NODES " << std::endl;
        return -1;
    }

    const std::string path(argv[1]);
    uint32_t totalNodes = atoi(argv[2]);
    totNodes = totalNodes;

    // Variables to read compressed sequences
    sdsl::wm_int<sdsl::rrr_vector<15>> x_wm;
    sdsl::rrr_vector<63> b1_rrr;
    sdsl::wm_int<sdsl::rrr_vector<15>> y_wm;

    // Read compressed sequences
    bit_file_t *bInFile = NULL;
    huffman_node_t *ht = readCompressedHuffB2(path, &bInFile); 
    readCompressed(path, x_wm, b1_rrr, y_wm);

    std::vector< std::unordered_set<uint32_t>> graph(totalNodes);
    sdsl::rrr_vector<63>::rank_1_type b1_rank(&b1_rrr);
    sdsl::rrr_vector<63>::select_1_type b1_select(&b1_rrr);

    std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now();

    getSeq(x_wm, b1_rank, b1_select, ht, bInFile, y_wm, y_wm.size(), graph);

    std::chrono::high_resolution_clock::time_point stop_time = std::chrono::high_resolution_clock::now();

    auto duration = std::chrono::duration_cast<std::chrono::milliseconds> (stop_time - start_time).count();

    std::cerr << "Time Reconstruction : " << duration << " [ms]" << std::endl;
    BitFileGetBitAllClean(bInFile);


    uint32_t tedges = 0;
    for(uint32_t i=0; i<graph.size(); i++)
    {
	tedges += graph[i].size();
	std::unordered_set<uint32_t>::const_iterator got = graph[i].find(i);
	if(got != graph[i].end())
		tedges--;
    }
    std::cerr << "time/edges: " << duration/(double)tedges << " [ms]" << std::endl;

    std::cout << " graph size "<<graph.size() << " total edges "<<tedges<<std::endl;
    return 0;
}
