#include "/home/andrea/prot/C/lib/Include_basic.h"

int LENGTH = 0;
int NSITE = 0;

int NMIN_CLUSTER = 50; // for output
double MIN_DISTANCE = 0.05;

double DISTANCE1 = 0.5;
double DISTANCE2 = 1.0;
double DISTANCE3 = 1.5;

char file_map[100]="bsp_imap.dat";
char file_seq[100]="bsp_seq.dat";

struct imat imat;

struct set{
  int n;
  int *v;
};

struct site{
  double G;
  int    *vec;
  struct set set;
} *SITE;

double  **dmap;


struct cluster{
  int n;
  int *v;
} *CLUSTER;

/*******************/
int main(int argc, char *argv[])
{
  void read_site();
  void evaluate_distance(struct site *site);
  void average_linkage(struct site * site, struct cluster *cluster);

  printf("Read from %s!\n", file_seq);
  read_site();
  printf("%d %d\n",NSITE,LENGTH);

  printf("Read from %s!\n", file_map);
  load_imat(&imat, file_map);

  printf("Evaluate distance!\n");
  evaluate_distance(SITE);

  printf("Average linkage\n");
  average_linkage(SITE, CLUSTER);
}

/*
  READ SITE
*/
void read_site()
{
  char buffer[1000];

  FILE *fp = fopen_read(file_seq);
  if(fgets(buffer, 1000, fp) == NULL){
    printf("File not good\n");
  }
  sscanf(buffer, "%d %d", &NSITE, &LENGTH);
  SITE = new struct site[NSITE];
  for(int i=0; i<NSITE; i++){
    SITE[i].vec = new int[LENGTH];
  }

  //printf("Ciao! %d %d\n", NSITE, LENGTH);
  char str[LENGTH+1];
  int n1;
  int n=0;

  while(fgets(buffer, 1000, fp)){
    //printf("Ciao! %d %x\n",n+1, (int)SITE+n);
    if(n >= NSITE){
      printf("ERRORE Length: %d n: %d\n", LENGTH, n);
      exit(0);
    }
    //printf("%5d %s\n",i+1, buffer);
    sscanf(buffer, "%d %lf %s", &n1, &SITE[n].G, str);
    for(int j=0; j<LENGTH; j++){
      SITE[n].vec[j] = str[j] - '0';
      //printf("%d",SITE[n].vec[j]);
    }
    //printf("\n");
    n++;
  }

  fclose(fp);
}

/********************************************************
 *********************   DISTANCE   *********************
 *******************************************************/

void evaluate_distance(struct site *site)
{
  double distance_site( struct site *site1, struct site *site2);

  for(int i=0; i< NSITE; i++){
    //printf("%d %x\n", i+1, (int)site+i );
    site[i].set.v = new int[LENGTH];
    site[i].set.n = 0;
    for(int j=0; j<LENGTH; j++){
      //printf("%d\n",site[i].vec[j]);
      if(site[i].vec[j] == 2){
	site[i].set.v[site[i].set.n] = j;
	site[i].set.n++;
      }
    }
  }

  dmap = alloc_dmat(NSITE, NSITE);

  for(int i=0; i< NSITE; i++){
    //printf("%4d %6.2f\n",i+1, dmap[i][0]);
    dmap[i][i] = 0.0;
    for(int j=i+1; j<NSITE; j++){
      dmap[i][j] = dmap[j][i] = (distance_site( site+i, site+j)
	+ distance_site( site+j, site+i))/2.0;
    }
  }

  FILE *fp = fopen_write("bsp_dmap.dat");
  fprintf(fp,"%d %d\n",NSITE, NSITE);
   for(int i=0; i< NSITE; i++)
    for(int j=i+1; j<NSITE; j++)
      fprintf(fp, "%4d %4d %6.2f\n",i+1, j+1, dmap[i][j]);
  fclose(fp);
}

// For every residue in site1 the minimum distance
// with respect to patch 2 is evaluated.
// The distance between patches is the average distance
// between residues in patch 1 and residues in patch 2.
// Distance between residues is 1 if they are nearest neighbors,
// 2 if they are second nearest neighbors,...

double distance_site( struct site *site1, struct site *site2)
{
  double dist = 0.0;
  int u,v;
  for(int i=0; i<site1->set.n; i++){
    u = site1->set.v[i];
    int min = 999;
    for(int j=0; j<site2->set.n; j++){
      v = site2->set.v[j];
      if( u==v ){ 
	min=0;
	break;
      }
      else if(imat.mat[u][v]< min){
	min = imat.mat[u][v];
      }
    }
    dist += min;
  }
  dist /= (double) site1->set.n;

  return dist;
}

/*******************************************************
CLUSTERING
******************************************************/

struct ll{
  struct ll *next;
  struct ll *prev;
  int key;
};

struct head{
  int n;
  double distance;
  struct ll **v;
};

void average_linkage(struct site * site, struct cluster *cluster)
{
  void printf_ll(struct head head, FILE *fp);
  double distance_cluster(struct ll *l1, struct ll *l2);
  void merge_cluster(struct ll *l1, struct ll *l2);
  //void fprint_cluster(FILE *fp, struct cluster *cluster);


  struct ll ll[NSITE];
  struct head head;


  head.n = NSITE;
  head.v = new struct ll*[NSITE];


  for(int i=0; i<NSITE; i++){
    head.v[i] =  ll+i;
    ll[i].prev = NULL;
    ll[i].next = NULL;
    ll[i].key = i;
  }

  printf("I'm here %d\n", head.n);

  FILE *fp  = fopen_write("bsp_cluster.dat");
  FILE *fp2 = fopen_write("bsp_cluster2.dat");

  head.distance = 0.0;
  double distance;
  while( head.n > 1){
    if(head.n < NMIN_CLUSTER)      printf_ll(head, fp);
    double min_distance = 999.0;
    int k1 = 0;
    int k2 = 1;
    for(int i=0; i<head.n; i++){
      for(int j=i+1; j<head.n; j++){
	distance = distance_cluster(head.v[i], head.v[j]);
	if(min_distance> distance){
	  min_distance = distance;
	  k1 = i;
	  k2 = j;
	}
	if(min_distance < MIN_DISTANCE)  break;
      }
      if(min_distance < MIN_DISTANCE)  break;
    }
    head.distance = min_distance;
    printf("# Nclsuter %5d  distance  %f  object to merge (%d-%d)\n", head.n, min_distance,k1,k2);
    fprintf(fp2, "%5d  %f\n", head.n, min_distance);
    merge_cluster(head.v[k1],head.v[k2]);
    head.n--;
    for(int k = k2; k<head.n; k++){ 
      head.v[k] = head.v[k+1];
    }
    
  }
  fclose(fp);
  fclose(fp2);
}

void printf_ll(struct head head, FILE *fp)
{
  struct ll *p1;
  fprintf(fp, "NO-CLUSTERS: %5d\n",head.n);
  fprintf(fp, "DISTANCE: %10.4f\n",head.distance);
  for(int i=0; i<head.n; i++){
    p1 = head.v[i];
    fprintf(fp, "  %3d -> %d", i+1, p1->key);
    while(p1->next != NULL){
      p1=p1->next;
      fprintf(fp, " %d",  p1->key);
    }
    fprintf(fp, "\n");
  }
  fprintf(fp, "@\n");
}

double distance_cluster(struct ll *l1, struct ll *l2)
{
  double distance = 0.0;
  int n_comparison = 0;
  
  struct ll *p1;
  struct ll *p2;
  
  for(p1=l1; p1 != NULL; p1 = p1->next){
    for(p2=l2; p2 != NULL; p2 = p2->next){
      n_comparison++;
      distance += dmap[p1->key][p2->key];
    }
  }
  distance /= (double) n_comparison;

  return distance;
}

void merge_cluster(struct ll *l1, struct ll *l2)
{
  struct ll *p1 = l1;
  while(p1->next !=NULL){
    p1 = p1->next;
  }

  p1->next = l2;
  l2->prev = l1;
  //printf("merge: %x->[%d %x]", (int)p1, p1->key, (int)p1->next);
  //printf("->[%d %x]", l2->key,(int)l2->next);
  //printf("\n");
}




/**************************************** 
WARNING END OF PROGRAM  
*****************************/


/*************************************************************

               BINARY TREE
*************************************************************/
/**********************************************************

non-finished


struct node{
  struct node *right;
  struct node *left;
  struct node *father;
  int distance;
  int nkeys;
  int *keys;
}

struct head2{
  int n;
  struct node **v;
};


void average_linkage(struct site * site, struct cluster *cluster)
{
  void printf_ll(struct head head, FILE *fp);
  double distance_cluster(struct ll *l1, struct ll *l2);
  void merge_cluster(struct ll *l1, struct ll *l2);
  //void fprint_cluster(FILE *fp, struct cluster *cluster);



  struct head head; 
  struct node node1[NSITE];
  struct node node2[NSITE];
  int nnode2 = 0;

  head.n = NSITE;
  head.v = new struct node*[NSITE];

  for(int i=0; i<NSITE; i++){
    node1[i].nkeys = 1;
    node1[i].key = new int[1];
    node1[i].key = i;
    head2.v[i] = node +i;
  }

  printf("I'm here %d\n", head.n);

  FILE *fp  = fopen_write("bsp_cluster.dat");
  FILE *fp2 = fopen_write("bsp_cluster2.dat");

  head.distance = 0.0;
  double distance;
  while( head.n > 1){
    if(head.n < NMIN_CLUSTER)      printf_ll(head, fp);
    double min_distance = 999.0;
    int k1 = 0;
    int k2 = 1;
    for(int i=0; i<head.n; i++){
      for(int j=i+1; j<head.n; j++){
	distance = distance_cluster(head.v[i], head.v[j]);
	if(min_distance> distance){
	  min_distance = distance;
	  k1 = i;
	  k2 = j;
	}
	if(min_distance < MIN_DISTANCE)  break;
      }
      if(min_distance < MIN_DISTANCE)  break;
    }
    head.distance = min_distance;
    printf("# Nclsuter %5d  distance  %f  object to merge (%d-%d)\n", head.n, min_distance,k1,k2);
    merge_cluster(head.v[k1],head.v[k2]);
    
    head.v[k1] = node2[nnode2];
    merge(node2[nnode2], head.v[k1], head.v[k2]);
    nnode2++;
    head.n--;
    head.v[k2] = head.v[head.n];    
  }
  fclose(fp);
  fclose(fp2);
}

void print_tree(struct node *node)
{
  printf("(");
  if(node->nkeys == 1){
    printf("%d",node->keys);
  }
  else{
    print_tree(node->left);
    print_tree(node->right);
  }
}

void printf_ll(struct head head, FILE *fp)
{
  struct node *node;
  fprintf(fp, "NO-CLUSTERS: %5d\n",head.n);
  fprintf(fp, "DISTANCE: %10.4f\n",head.distance);

  for(int i=0; i<head.n; i++){
    fprintf(fp, "###  %3d -> ", i+1);
    for(int j=0; j<node->nkeys; j++){
      fprintf(fp, " %d",  node->keys[j]);
    }
    fprintf(fp, "\n");
  }
  fprintf(fp, "@\n");
}

double distance_cluster(struct node *n1, struct node *n2)
{
  int ncount=0;
  double distance =0.0;

  for(int i=0; i<n1->nkeys; i++){
    for(int j=0; j<n2->nkeys; j++){
      distance += dmap[n1->keys[i]][n2->keys[j]];
    }
  }

  return (distance/(double)ncount);
}

void merge_cluster(struct node *node, struct node *n1, struct node *n2)
{
  node->left  = n1;
  node->right = n2;

  node->nkeys = n1->nkeys + n2->keys;

  node->keys = new int[node->nkeys];

  int nkeys=0;
  for(int i=0; i<n1->nkeys; i++){
    node->keys[nkeys++] = n1->keys[i]; 
  }
  for(int i=0; i<n2->nkeys; i++){
    node->keys[nkeys++] = n2->keys[i]; 
  }
  if(nkeys != node->nkeys){
    fprintf(stderr, "Errore nkeys %d %d\n", nkeys, node->nkeys);
    exit(0);
  }
}

******************************************/
