/* This is the size ubenchmark. We touch consecutive nodes until we run out of
 * room in hardware
 */
#include "stdio.h"

//possible values: TRANS_OFF, TRANS, LOCKS, STM, HSTM
#define HSTM
#define SGILOCKS
#define CHECK_OFF

#ifdef OMPLOCKS
#include "omp.h"
#endif

#define NTHREADS 1
#define NNODES 5000
#define NITER 4900
#define STARTVAL NNODES
#define RANDSEED 50
#define CLINESIZE 128
#define FAILONATTEMPT 2

#define BACKOFF_MIN 32
#define FLAG 9999999

#define COMMITTED 1
#define PENDING   0

#define start_trans(obj, reader) \
reader.next = (*obj).readers; \
(*obj).readers = &reader;

#define first_read(a, obj) \
if ((*obj).val == FLAG) { \
  if ((*(*(*obj).versions).tmtag) == PENDING) {\
    printf ("this should not happen\n"); \
  } else {\
    (*obj).val = (*(*obj).versions).val;\
  }\
}\
a = (*obj).val;

//writing a to fld
#define first_write(a, obj, tmtag, tmpobj) \
if ((*obj).val == FLAG) \
  printf ("this should not happen\n"); \
tmpobj = (node_t *)malloc(sizeof(node_t)); \
(*tmpobj).versions = (*obj).versions; \
(*obj).versions = tmpobj; \
(*tmpobj).tmtag = tmtag; \
(*obj).val = FLAG; \
(*tmpobj).val = a; 


#ifdef OMPLOCKS
#define lock_t omp_lock_t
#define lock(l) omp_set_lock(l)
#define release(l) omp_unset_lock(l)
#define init(l) omp_init_lock(l)
#endif

#ifdef SGILOCKS
#define lock_t int
#define lock(l) while (__lock_test_and_set(l, 1) != 0)
#define release(l) __lock_release(l)
#define init(l) (*l = 0)
#endif

typedef unsigned long long uint64;

typedef int tmtag_t;

typedef struct reader_struct {
  tmtag_t *tmtag;
  struct reader_struct *next;
} reader_t;

typedef struct node_struct {
  struct node_struct *versions;
  tmtag_t *tmtag;
  reader_t *readers;
  int val;
} node_t;

lock_t *nodeLocks, totaltLock;
int totalt, lockspacing, nodespacing;
node_t *nodes, *nodes1;
//int *randlist1[NTHREADS];
//int *randlist2[NTHREADS];
tmtag_t tmtaglist[NNODES];

#define slrand(new, prev) new = (16807 * prev) % 2147483647

void main() {

  int n,m,sum;

  putenv("_DSM_VERBOSE=1");
  putenv("OMP_DYNAMIC=FALSE");
  putenv("MP_SUGNUMTHD=OFF");
  putenv("MPC_SUGNUMTHD=OFF");
  putenv("MPC_GANG=OFF");
  //putenv("OMP_NUM_THREADS=2");
  omp_set_num_threads(NTHREADS);

  srand(RANDSEED);

  nodeLocks = (lock_t *)malloc(NNODES*CLINESIZE);
  lockspacing = CLINESIZE/sizeof(lock_t);

  nodes  = (node_t *)malloc(NNODES*CLINESIZE);
  nodes1 = (node_t *)malloc(NNODES*CLINESIZE);
  nodespacing = CLINESIZE/sizeof(node_t);

  //init the nodes
  for (n=0; n<NNODES; n++) {
    node_t *node, *node1;

    node = &nodes[n*nodespacing];
    node1 = &nodes1[n*nodespacing];
    
    (*node).val = STARTVAL;
    (*node1).val = STARTVAL;
    (*node).versions = node1;
    (*node1).versions = NULL;
    (*node).tmtag = NULL;
    (*node1).tmtag = NULL;
    (*node).readers = NULL;
    (*node1).readers = NULL;

    init(&nodeLocks[n*lockspacing]);
  }
  
  totalt = 0;
  init(&totaltLock);

  //init the random numbers
  //it takes too long to call rand in the loop.
  
  /*
  for (n=0; n<NTHREADS; n++) {
    randlist1[n] = (int*)malloc(NITER*sizeof(int));
    randlist2[n] = (int*)malloc(NITER*sizeof(int));
  }

  
  for (n=0; n<NTHREADS; n++) {
    for (m=0; m<NITER; m++) {
      randlist1[n][m] = rand()%NNODES;
      do {
	randlist2[n][m] = rand()%NNODES;
      } while (randlist2[n][m] == randlist1[n][m]);
    }
  }
  */

  #pragma omp parallel shared (totalt, totaltLock)
  {
    int k,i,start,end,pid,delay,tmp,attempt;
    #ifdef LOCKS
    int l1,l2,t;
    #endif
    reader_t reader;

    tmtag_t *tmtag;
    node_t *node, *tmpnode;
    
    uint64 seed,n1,n2;

    pid = omp_get_thread_num();
    seed = pid+1;
    
    //mp_barrier();
    start = sysclocks();

      #ifdef TRANS
	xbegin();
	for (k=0; k<NITER; k++) {
	  n1 = k*nodespacing;
	  nodes[n1].val += 1;
	}
	xend();
      #endif
	
      #ifdef HSTM
	attempt = 0;
	xbegin();
	nxbegin();
	attempt++;
	nxend();
	if (attempt != FAILONATTEMPT) {
	  for (k=0; k<NITER; k++) {
	    n1 = k*nodespacing;
	    if (nodes[n1].val == FLAG || nodes[n1].readers) {
	      xabort();
	    }	  
	    nodes[n1].val += 1;
	  }
	  xend();
	} else {
	  xend();
	  tmtag = &tmtaglist[0];
	  *tmtag = PENDING;
	  reader.tmtag = tmtag;
	  for (k=0; k<NITER; k++) {
	    n1 = k*nodespacing;
	    node = &nodes[n1];
	    start_trans(node, reader); 
	    first_read(tmp, node);
	    tmp++;
	    first_write(tmp, node, tmtag , tmpnode);	  
	  }
	  *tmtag = COMMITTED;
	}
      #endif

      #ifdef STM
	tmtag = &tmtaglist[0];
	*tmtag = PENDING;
	reader.tmtag = tmtag;
	for (k=0; k<NITER; k++) {
	  n1 = k*nodespacing;
	  node = &nodes[n1];
	  start_trans(node, reader); 
	  first_read(tmp, node);
	  tmp++;
	  first_write(tmp, node, tmtag , tmpnode);	  
	}
	*tmtag = COMMITTED;
      #endif
      #ifdef SCHTM
        attempt = 0;
        xbegin();
        nxbegin();
        attempt++;
        nxend();
        if (attempt != FAILONATTEMPT) {
	  //nxbegin();
          for (k=0; k<NITER; k++) {
	    //nxend();
            n1 = k*nodespacing;
            if (nodes[n1].val == FLAG || nodes[n1].readers) {
              xabort();
            }  
            nodes[n1].val += 1;
	    //nxbegin();
	  }
	  //nxend();
          xend();
        } else {
          xend();
       	}
      #endif

    end = sysclocks();
   
    printf ("k=%d\n", k);
 
    lock(&totaltLock);
    totalt += (end-start);
    release(&totaltLock);
    
    //mp_barrier();
  }

  printf("total clock cycles %d\n", totalt);
  printf("cycles per update  %d\n", totalt/(NTHREADS*NITER));

  #ifdef CHECK_OFF
  return;
  #endif
  
  //check the nodes
  sum = 0;
  for (n=0; n<NNODES; n++) {
    if (nodes[n*nodespacing].val == FLAG) {
      sum+=(*nodes[n*nodespacing].versions).val;
    } else {
      sum += nodes[n*nodespacing].val;
    }
  }
  if (sum == STARTVAL*NNODES) {
    printf ("Check PASSED!\n");
  } else {
    printf ("Check FAILED: sum=%d expected=%d!\n",sum, STARTVAL*NNODES);
  }

}

