#include <assert.h>
#include <cosmo.h>
#include <pthread.h>
#include <stdatomic.h>

#define ITERATIONS 10000000l

struct {
  alignas(64) atomic_long x;
} counter[COSMO_SHARDS];

void hit(void) {
  atomic_fetch_add_explicit(&counter[cosmo_shard()].x, 1, memory_order_relaxed);
}

long count(void) {
  long r = 0;
  for (long i = 0; i < COSMO_SHARDS; ++i)
    r += atomic_load_explicit(&counter[i].x, memory_order_relaxed);
  return r;
}

void *worker(void *arg) {
  for (long i = 0; i < ITERATIONS; ++i)
    hit();
  return 0;
}

int main(void) {
  int threads = cosmo_cpu_count();
  pthread_t th[threads];
  for (long i = 0; i < threads; ++i)
    pthread_create(&th[i], 0, worker, 0);
  for (long i = 0; i < threads; ++i)
    pthread_join(th[i], 0);
  assert(count() == threads * ITERATIONS);
}