Skip to content
This repository has been archived by the owner on Mar 21, 2021. It is now read-only.

Commit

Permalink
Very big size update
Browse files Browse the repository at this point in the history
  • Loading branch information
Henriquelay committed Mar 15, 2021
1 parent fc67b3d commit a80183b
Show file tree
Hide file tree
Showing 12 changed files with 231 additions and 63 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ $(BUILD_DIR)/%.c.o: %.c
clean:
$(RM) */*.o */*.d

run: $(TARGET_EXEC)
$(BUILD_DIR)/$(TARGET_EXEC) $(ARGS)

valzin: $(TARGET_EXEC)
$(VALGRIND) $(VALZIN_FLAGS) $(FLAGS) $(BUILD_DIR)/$(TARGET_EXEC) $(ARGS)
Expand Down
15 changes: 6 additions & 9 deletions lib/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,24 @@
#ifndef _DATA_H_
#define _DATA_H_

#define _GNU_SOURCE // Needed to use qsort_r when not using c11 or gnu89/gnu99
#include <math.h>

#include "./fileReader.h"
#include "./unionFind.h"
#include "./file.h"

// "Rows"
typedef struct sample {
typedef struct sample_t {
char* id;
long double* features;
size_t index;
} sample_t;

// "Lines" of rows
typedef struct dataSet {
typedef struct dataSet_t {
size_t nElements;
size_t nFeatures;
sample_t* samples;
} dataSet_t;


dataSet_t* loadData(FILE* file, const char *separator);
dataSet_t* initDataSet(size_t nFeatures, size_t nElements);
dataSet_t* loadData(FILE* file, const char* separator);
void printSample(const sample_t* sample, const size_t* nFeatures);
void printDataSet(dataSet_t* dataSet);
void destroySample(sample_t* sample);
Expand Down
31 changes: 31 additions & 0 deletions lib/distances.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/**
* This library is for handling distances calculations and related manners.
**/

#ifndef _DISTS_H_
#define _DISTS_H_

#include <math.h>

#include "./data.h"

// "Rows"
typedef struct distanceSample_t {
char* id;
long double distance;
// On distanceSet
sample_t *from, *to;
} distanceSample_t;

// "Lines" of rows
typedef struct distanceDataSet_t {
size_t nElements;
size_t depth;
distanceSample_t* samples;
} distanceDataSet_t;

distanceDataSet_t* calculateDistances(dataSet_t* points);
void printDistanceSet(distanceDataSet_t* dataSet);
void destroyDistanceDataSet(distanceDataSet_t* dataSet);

#endif
File renamed without changes.
15 changes: 15 additions & 0 deletions lib/graph.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/**
* This library is for handling dataSets as in they were a graph. Any graph algorithm should be here
**/

#ifndef _GRAPH_H_
#define _GRAPH_H_

#define _GNU_SOURCE // Needed to use qsort_r when not using c11 or gnu89/gnu99

#include "./data.h"
#include "./unionFind.h"

union_t* kruskal(distanceDataSet_t* dataSet, size_t groupsNumber);

#endif
12 changes: 6 additions & 6 deletions lib/unionFind.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@
#include <stdlib.h>
#include <stdio.h>

#include "./distances.h"

typedef struct union_t {
size_t* array;
// Array of sizes to calculate weight and balance the insertion
size_t* size;
// Number of elements on array
size_t arraySize;
// Tie-in to elem
void *elem;
distanceSample_t *sample;
} union_t;

union_t* UF_init(const size_t size, void** elemArray);
union_t* UF_destroy(union_t* unionStruct);
union_t* UF_init(const size_t size, distanceSample_t* samples);
void UF_destroy(union_t* unionStruct);
size_t UF_find(union_t* unionStruct, size_t index);
void UF_union(union_t* unionStruct, const size_t p, const size_t q);
char UF_union(union_t* unionStruct, const size_t p, const size_t q);

#endif
35 changes: 20 additions & 15 deletions src/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,40 @@

#define BUFSIZE 1500

dataSet_t* loadData(FILE* file, const char* separator) {
char buffer[BUFSIZE];
size_t bufferSize = BUFSIZE;

dataSet_t* initDataSet(size_t nFeatures, size_t nElements) {
dataSet_t* dataSet = (dataSet_t*)malloc(sizeof(dataSet_t));
if (dataSet == NULL) {
perror("Error allocating new dataSet. Exiting");
exit(1);
};

dataSet->nFeatures = getLineSize(file, *separator, buffer, &bufferSize);
dataSet->nElements = countLines(file);


// 0 will always be id, read as `char*`.
dataSet->nFeatures = nFeatures;
dataSet->nElements = nElements;
dataSet->samples = (sample_t*)malloc(sizeof(sample_t) * dataSet->nElements);
if (dataSet->samples == NULL) {
perror("Error allocating new samples. Exiting");
exit(1);
}
return dataSet;
}

dataSet_t* loadData(FILE* file, const char* separator) {
char buffer[BUFSIZE];
size_t bufferSize = BUFSIZE;

dataSet_t* dataSet = initDataSet(getLineSize(file, *separator, buffer, &bufferSize), countLines(file));

// void** line = NULL;
for (size_t i = 0; i < dataSet->nElements; i++) {
char** line = readLine(file, separator, buffer, &bufferSize, &dataSet->nFeatures);
dataSet->samples[i].id = line[0];
dataSet->samples[i].index = i;
dataSet->samples[i].features = (long double*)malloc(sizeof(long double) * dataSet->nFeatures);
if (dataSet->samples == NULL) {
perror("Error allocating features for new sample. Exiting");
exit(1);
}
for (size_t j = 0; j < dataSet->nFeatures; j++) {
dataSet->samples[i].features[j] = strtold(line[j + 1], NULL);
free(line[j+1]);
free(line[j + 1]);
}
free(line);
}
Expand All @@ -43,9 +44,10 @@ dataSet_t* loadData(FILE* file, const char* separator) {
}

void printSample(const sample_t* sample, const size_t* nFeatures) {
printf("%s", sample->id);
printf("%s:", sample->id);
for (size_t j = 0; j < *nFeatures; j++) {
printf(",%Lf", sample->features[j]);
// printf("\t[%ld]", j);
printf("\t%Lf", sample->features[j]);
}
puts("");
}
Expand All @@ -56,15 +58,18 @@ void printDataSet(dataSet_t* dataSet) {
}
}

// Does not free IDs
void destroySample(sample_t* sample) {
free(sample->features);
free(sample->id);
sample = NULL;
}

// Does not free IDs
void destroyDataSet(dataSet_t* dataSet) {
for (size_t i = 0; i < dataSet->nElements; i++) {
destroySample(&dataSet->samples[i]);
}
free(dataSet->samples);
free(dataSet);
dataSet = NULL;
}
65 changes: 65 additions & 0 deletions src/distances.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#include "../lib/distances.h"

distanceDataSet_t* initDistanceDataSet(size_t nElements) {
distanceDataSet_t* dataSet = (distanceDataSet_t*)malloc(sizeof(distanceDataSet_t));
if (dataSet == NULL) {
perror("Error allocating new dataSet. Exiting");
exit(1);
};
dataSet->depth = nElements;
dataSet->nElements = (nElements / 2) * (nElements - 1);
dataSet->samples = (distanceSample_t*)malloc(sizeof(distanceSample_t) * dataSet->nElements);
if (dataSet->samples == NULL) {
perror("Error allocating new samples. Exiting");
exit(1);
}
return dataSet;
}

long double euclidianDistance(long double* a, long double* b, size_t* nFeatures) {
long double accumulator = 0;

// SUM(abs(a - b)^2)
for (size_t i = 0; i < *nFeatures; i++) {
// printf("[dim = %ld Val a = %Lf b = %Lf]", *nFeatures, a[i], b[i]);

if (a[i] < b[i]) {
accumulator += (b[i] - a[i]) * (b[i] - a[i]);
} else {
accumulator += (a[i] - b[i]) * (a[i] - b[i]);
}
}
long double dist = sqrtl(accumulator);
// printf(" Dist: %Lf\n", dist);
return dist;
}

distanceDataSet_t* calculateDistances(dataSet_t* locationSet) {
distanceDataSet_t* distanceSet = initDistanceDataSet(locationSet->nElements);

for (size_t i = 0, count = 0; i < locationSet->nElements; i++) {
for (size_t j = 0; j < i; j++, count++) {
distanceSet->samples[count].from = &locationSet->samples[i];
distanceSet->samples[count].to = &locationSet->samples[j];
printf("Calculating distance from %ld to %ld, which are %s and %s\n", i, j, locationSet->samples[i].id, locationSet->samples[j].id);
distanceSet->samples[count].distance = euclidianDistance(locationSet->samples[i].features, locationSet->samples[j].features, &locationSet->nFeatures);
}
}
return distanceSet;
}

void printDistanceSet(distanceDataSet_t* dataSet) {
for (size_t i = 0, k = 0; i < dataSet->depth; i++) {
printf("%s:", dataSet->samples[k].from->id);
for (size_t j = 0; j < i; j++, k++) {
printf("\t%Lf", dataSet->samples[k].distance);
}
puts("");
}
}

void destroyDistanceDataSet(distanceDataSet_t* dataSet) {
free(dataSet->samples);
free(dataSet);
dataSet = NULL;
}
2 changes: 1 addition & 1 deletion src/fileReader.c → src/file.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "../lib/fileReader.h"
#include "../lib/file.h"

/**
* Opens file as readonly, checks for errors, exit if any errors, then returns
Expand Down
45 changes: 45 additions & 0 deletions src/graph.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include "../lib/graph.h"

int compareDistanceSamples(const void* a, const void* b) {
if (((distanceSample_t*)a)->distance < ((distanceSample_t*)b)->distance) return -1;
if (((distanceSample_t*)a)->distance > ((distanceSample_t*)b)->distance) return 1;
return 0;
}

union_t* kruskal(distanceDataSet_t* dataSet, size_t groupsNumber) {
// Qsorting dataSet, Kruskal needs a sorted set
qsort(dataSet->samples, dataSet->nElements, sizeof(distanceSample_t), &compareDistanceSamples);


union_t* un = UF_init(dataSet->depth, dataSet->samples);
size_t currentGroups = dataSet->depth;

// Not executing K times, don't even need to remove later
// Sice it's sorted, it's on the 3 largest distances
for (size_t i = 0; currentGroups != groupsNumber; i++) {
size_t p = dataSet->samples[i].from->index, q = dataSet->samples[i].to->index;
if (UF_union(un, p, q) == 1) {
currentGroups--;
}
}

// puts("Final");
// for (size_t i = 0; i < dataSet->depth; i++) {
// printf("%ld ", i);
// }
// puts("");
// for (size_t i = 0; i < dataSet->depth; i++) {
// printf("%ld ", UF_find(un, i));
// }
// puts("");
// for (size_t i = 0; i < dataSet->depth; i++) {
// printf("%s ", un->sample[i].from->id);
// }
// puts("");

return un;
}

sample_t** sortMST(union_t* MST) {
return NULL
}
33 changes: 20 additions & 13 deletions src/main.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include "../lib/data.h"
#include "../lib/distances.h"
#include "../lib/graph.h"

#define SEPARATOR ","

int main(int argc, char** argv) {
char* filename = argv[1];
// size_t K = strtoul(argv[2], NULL, 10);
size_t K = strtoul(argv[2], NULL, 10);
FILE* file = openFile(filename);
// size_t tokenAmount = getLineSize(file, ",", buffer, &bufferSize);
// char** tokens = readLine(file, ",", buffer, &bufferSize, &tokenAmount);
Expand All @@ -14,23 +15,29 @@ int main(int argc, char** argv) {
// }
// puts("");

dataSet_t* data = loadData(file, SEPARATOR);
dataSet_t* dataPlot = loadData(file, SEPARATOR);
closeFile(file);

puts("Loaded data:");
printDataSet(data);
puts("Loaded dataPlot:");
printDataSet(dataPlot);

// data_t* distances = getDistances(data);
// puts("Distances matrix:");
// printDistanceMatrix(distances);
// FIXME printing B: in in on first line
distanceDataSet_t* distanceSet = calculateDistances(dataPlot);
puts("Loaded distanceSet:");
printDistanceSet(distanceSet);

// dataVector_t* dataVec = vectorizeData(distances, data);
// puts("Vectorized data:");
// printVectorizedData(dataVec);
union_t * MST = kruskal(distanceSet, K);

// kruskal(dataVec, K, data);

destroyDataSet(data);
// Freeing ids here...
for (size_t i = 0; i < dataPlot->nElements; i++) {
free(dataPlot->samples[i].id);
}
// ...because these doesn't free ID's, they reuse the same address
destroyDataSet(dataPlot);
destroyDistanceDataSet(distanceSet);
UF_destroy(MST);


// char* outputFile = argv[3];
}
Loading

0 comments on commit a80183b

Please sign in to comment.