Skip to content
This repository has been archived by the owner on Mar 21, 2021. It is now read-only.

Commit

Permalink
Bulletando
Browse files Browse the repository at this point in the history
  • Loading branch information
Henriquelay committed Mar 15, 2021
1 parent 4541920 commit fc67b3d
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 220 deletions.
48 changes: 22 additions & 26 deletions lib/data.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* This library is for correcly handling the data structure defined for this project.
* This library is for correcly handling the data structures defined for this project.
**/

/**
Expand All @@ -14,34 +14,30 @@
#ifndef _DATA_H_
#define _DATA_H_

#define _GNU_SOURCE // Needed to use qsort_r when not using c11 or gnu89/gnu99
#include <math.h>

#include "./fileReader.h"
#include "./unionFind.h"

typedef struct data_t {
void*** dataMatrix;
size_t i, j;
} data_t;

typedef struct dataVectorCell_t {
long double* distance;
// I and J from the original matrix the vector is pointing to
size_t i;
size_t j;
} dataVectorCell_t;

typedef struct dataVector_t {
dataVectorCell_t* vec;
// Number of indexed in vec
size_t size;
} dataVector_t;

data_t* loadData(FILE* file, const char* separator);
void printData(const data_t* dataStruct);
data_t* getDistances(data_t* data);
void destroyData(data_t* data);
dataVector_t* vectorizeData(data_t* data);
union_t* kruskal(dataVector_t* dataVec, size_t K);

// "Rows"
typedef struct sample {
char* id;
long double* features;
} sample_t;

// "Lines" of rows
typedef struct dataSet {
size_t nElements;
size_t nFeatures;
sample_t* samples;
} dataSet_t;


dataSet_t* loadData(FILE* file, const char *separator);
void printSample(const sample_t* sample, const size_t* nFeatures);
void printDataSet(dataSet_t* dataSet);
void destroySample(sample_t* sample);
void destroyDataSet(dataSet_t* dataSet);

#endif
4 changes: 2 additions & 2 deletions lib/fileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
FILE* openFile(const char* fileName);
char closeFile(FILE* file);

size_t getLineSize(FILE* file, const char* separator, char* buffer, size_t* bufferSize);
char** readLine(FILE* file, const char *separator, char* buffer, size_t* bufferSize, const size_t* nFeatures);
size_t getLineSize(FILE* file, const char separator, char* buffer, size_t* bufferSize);
size_t countLines(FILE* file);
void** readLine(FILE* file, const char* separator, char* buffer, size_t* bufferSize, const size_t* tokenAmount);

#endif
188 changes: 41 additions & 147 deletions src/data.c
Original file line number Diff line number Diff line change
@@ -1,176 +1,70 @@
#include "../lib/data.h"

// TODO - Check if a Macro to derefence (*((long double*)b[i])) is viable
#define BUFSIZE 1500

void printData(const data_t* dataStruct) {
for (size_t i = 0; i < dataStruct->i; i++) {
printf("%s", (char*)dataStruct->dataMatrix[i][0]);
for (size_t j = 1; j < dataStruct->j; j++) {
printf(",%.15Lf", *((long double*)dataStruct->dataMatrix[i][j]));
}
puts("");
}
}
dataSet_t* loadData(FILE* file, const char* separator) {
char buffer[BUFSIZE];
size_t bufferSize = BUFSIZE;

// TODO check before freeing -- don't check whole array
void destroyData(data_t* data) {
for (size_t i = 0; i < data->i; i++) {
for (size_t j = 0; j < data->j; j++) {
free(data->dataMatrix[i][j]);
}
free(data->dataMatrix[i]);
}
free(data->dataMatrix);
free(data);
}

data_t* loadData(FILE* file, const char* separator) {
char buffer[5000];
size_t bufferSize = 0;

data_t* dataStruct = (data_t*)malloc(sizeof(data_t));
if (dataStruct == NULL) {
perror("Error allocating new dataStructure. Exiting");
dataSet_t* dataSet = (dataSet_t*)malloc(sizeof(dataSet_t));
if (dataSet == NULL) {
perror("Error allocating new dataSet. Exiting");
exit(1);
};

dataStruct->i = countLines(file);
dataStruct->j = getLineSize(file, separator, buffer, &bufferSize);
dataSet->nFeatures = getLineSize(file, *separator, buffer, &bufferSize);
dataSet->nElements = countLines(file);


// 0 will always be id, read as `char*`.
dataStruct->dataMatrix = (void***)malloc(sizeof(void**) * dataStruct->i);
if (dataStruct->dataMatrix == NULL) {
perror("Error allocating new dataString lines. Exiting");
dataSet->samples = (sample_t*)malloc(sizeof(sample_t) * dataSet->nElements);
if (dataSet->samples == NULL) {
perror("Error allocating new samples. Exiting");
exit(1);
}

// void** line = NULL;
for (size_t i = 0; i < dataStruct->i; i++) {
dataStruct->dataMatrix[i] = readLine(file, separator, buffer, &bufferSize, &dataStruct->j);
}

return dataStruct;
}

long double distance(long double** a, long double** b, size_t dimensions) {
long double accumulator = 0;
for (size_t i = 1; i <= dimensions; i++) {
// printf("dim = %ld Val a = %Lf b = %Lf\n", dimensions, *a[i], *b[i]);
if ((*a[i]) < (*b[i])) {
accumulator += ((*b[i]) - (*a[i])) * ((*b[i]) - (*a[i]));
} else {
accumulator += ((*a[i]) - (*b[i])) * ((*a[i]) - (*b[i]));
}
}
// TODO Try using sqrtl. Defined in <tgmath.h>
return sqrt(accumulator);
}

data_t* getDistances(data_t* data) {
data_t* distances = (data_t*)malloc(sizeof(data_t));
if (distances == NULL) {
perror("Error allocating new distancesure. Exiting");
exit(1);
};

distances->i = data->i;
// Data has [0] as identifiers, won't be using for calculations;
distances->j = data->j - 1;
// 0 will always be id, read as `char*`.
distances->dataMatrix = (void***)malloc(sizeof(long double**) * distances->i);
if (distances->dataMatrix == NULL) {
perror("Error allocating new dataString lines. Exiting");
exit(1);
}


for (size_t i = 0; i < distances->i; i++) {
distances->dataMatrix[i] = (void**)malloc(sizeof(void*) * i);
if (distances->dataMatrix[i] == NULL) {
perror("Error allocating new dataString lines on distances struct. Exiting");
for (size_t i = 0; i < dataSet->nElements; i++) {
char** line = readLine(file, separator, buffer, &bufferSize, &dataSet->nFeatures);
dataSet->samples[i].id = line[0];
dataSet->samples[i].features = (long double*)malloc(sizeof(long double) * dataSet->nFeatures);
if (dataSet->samples == NULL) {
perror("Error allocating features for new sample. Exiting");
exit(1);
}
for (size_t j = 0; j < i; j++) {
distances->dataMatrix[i][j] = (long double*)malloc(sizeof(long double));
if (distances->dataMatrix[i] == NULL) {
perror("Error allocating new dataString lines on distances struct. Exiting");
exit(1);
}
*((long double*)distances->dataMatrix[i][j]) = distance((long double**)data->dataMatrix[i], (long double**)data->dataMatrix[j], distances->j);
for (size_t j = 0; j < dataSet->nFeatures; j++) {
dataSet->samples[i].features[j] = strtold(line[j + 1], NULL);
free(line[j+1]);
}
free(line);
}

return distances;
}

int compareDataVecs(const void* a, const void* b) {
if (*(((dataVectorCell_t*)a)->distance) < *(((dataVectorCell_t*)b)->distance)) return -1;
if (*(((dataVectorCell_t*)a)->distance) > *(((dataVectorCell_t*)b)->distance)) return 1;
return 0;
return dataSet;
}

dataVector_t* vectorizeData(data_t* data) {
// It's a triangle
size_t cells = data->i * (data->i - 1) / 2;
dataVectorCell_t* vector = (dataVectorCell_t*)malloc(sizeof(dataVectorCell_t) * cells);
if (vector == NULL) {
perror("Erro allocating dataVector. Exiting");
exit(1);
void printSample(const sample_t* sample, const size_t* nFeatures) {
printf("%s", sample->id);
for (size_t j = 0; j < *nFeatures; j++) {
printf(",%Lf", sample->features[j]);
}
puts("");
}

size_t k = 0;
for (size_t i = 0; i < data->i; i++) {
for (size_t j = 0; j < i; j++, k++) {
vector[k].distance = data->dataMatrix[i][j];
vector[k].i = i;
vector[k].j = j;
}
}

// Sorting on distances
qsort(vector, cells, sizeof(dataVectorCell_t), &compareDataVecs);

dataVector_t* dataVec = (dataVector_t*)malloc(sizeof(dataVector_t));
if (dataVec == NULL) {
perror("Erro allocating dataVector. Exiting");
exit(1);
void printDataSet(dataSet_t* dataSet) {
for (size_t i = 0; i < dataSet->nElements; i++) {
printSample(&dataSet->samples[i], &dataSet->nFeatures);
}

dataVec->vec = vector;
dataVec->size = k;

// for (size_t i = 0; i < cells; i++) {
// printf("%Lf ", *vector[i].distance);
// }
// puts("");

return dataVec;
}

union_t* kruskal(dataVector_t* dataVec, size_t groupsNumber) {
union_t* un = UF_init(dataVec->size);
size_t currentGroups = dataVec->size;
for (size_t i = 0; currentGroups != groupsNumber; i++, currentGroups--) {

printf("I=%ld if=%d ancestor[i]=%ld ancestor[j]=%ld\n", i, UF_find(un, dataVec->vec[i].i) != UF_find(un, dataVec->vec[i].j), UF_find(un, dataVec->vec[i].i), UF_find(un, dataVec->vec[i].j));
void destroySample(sample_t* sample) {
free(sample->features);
free(sample->id);
}

if (UF_find(un, dataVec->vec[i].i) != UF_find(un, dataVec->vec[i].j)) {
UF_union(un, UF_find(un, dataVec->vec[i].i), UF_find(un, dataVec->vec[i].j));
}
}
puts("\nFinal");
for (size_t i = 0; i < dataVec->size; i++) {
printf("%ld %ld | ", dataVec->vec[i].i, dataVec->vec[i].j);
void destroyDataSet(dataSet_t* dataSet) {
for (size_t i = 0; i < dataSet->nElements; i++) {
destroySample(&dataSet->samples[i]);
}

// puts("\nRemovendo os 3 maiores");

// for(size_t i = un->size - 1; i > un->size - 3; i--) {
// free(un->array);
// free(un->arraySize);
// }
// un->size = un->size - 3;

return un;
free(dataSet->samples);
free(dataSet);
}
54 changes: 17 additions & 37 deletions src/fileReader.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,30 +24,29 @@ char closeFile(FILE* file) {
}


// TODO usar o getline esperamente para não precisar de getLineSize e countLines
// TODO usar o getline espertamente para não precisar de getLineSize e countLines
/**
* Returns how many tokens are in a like of input, including the identifier.
* Returns how many features are in a like of input, not including the identifier.
* Takes a buffer as input to avoid keep re-allocating memory
* */
size_t getLineSize(FILE* file, const char* separator, char* buffer, size_t* bufferSize) {
size_t charsRead = 0;
size_t getLineSize(FILE* file, const char separator, char* buffer, size_t* bufferSize) {
fpos_t previousPosition; // Stores previous position
fgetpos(file, &previousPosition);

size_t charsRead = 0;
if ((charsRead = getline(&buffer, bufferSize, file)) == -1) {
perror("Error getting new line. Exiting");
exit(1);
}
fsetpos(file, &previousPosition); // Restores previous position

size_t counter = 0;
for (int i = 0; i < charsRead; i++) {
if (buffer[i] == *separator) {
for (size_t i = 0; i < charsRead; i++) {
if (buffer[i] == separator) {
counter++;
}
}
// Buffer is not used, delete it after reading content
free(buffer);
return ++counter;
return counter;
}

/**
Expand All @@ -58,11 +57,8 @@ size_t countLines(FILE* file) {
fgetpos(file, &previousPosition);

size_t lines = 0;
for (char ch = fgetc(file); ch != EOF; ch = fgetc(file)) {
if (ch == '\n') {
lines++;
}
}
for (char ch = fgetc(file); !feof(file); ch = fgetc(file))
if (ch == '\n') lines++;

fsetpos(file, &previousPosition); // Restores previous position
return lines;
Expand All @@ -72,45 +68,29 @@ size_t countLines(FILE* file) {
* Takes an open file, returns an allocated struct with identifier and values
* Takes a buffer as input to avoid keep re-allocating memory
* */
void** readLine(FILE* file, const char* separator, char* buffer, size_t* bufferSize, const size_t* tokenAmount) {
char** readLine(FILE* file, const char *separator, char* buffer, size_t* bufferSize, const size_t* nFeatures) {
if (getline(&buffer, bufferSize, file) == -1) {
// perror("Error getting new line. Exiting");
// exit(1);
return NULL;
}

void** tokens = (void**)malloc(sizeof(void*) * *tokenAmount);
char** tokens = (char**)malloc(sizeof(char*) * (*nFeatures + 1));
if (tokens == NULL) {
perror("Error allocating tokens array");
exit(1);
}

// Alternativelly, a null pointer may be specified, in which case the function
//continues scanning where a previous successful call to the function ended.
char* token;
unsigned int i = 0;
char* identifier = strtok(buffer, separator);
// Allocates space for identifies cell
tokens[0] = (char*)malloc(sizeof(char) * ((strlen(identifier)) + 1));
if (tokens[0] == NULL) {
perror("Error allocating new dataString cells. Exiting");
exit(1);
}
strcpy(tokens[0], identifier);

char* token = strtok(buffer, separator);
// NOTE - Try removing one of the checks for's second term to gain a bit of time if it works
for (i = 1, token = strtok(NULL, separator); token != NULL && i < *tokenAmount; token = strtok(NULL, separator), i++) {
for (size_t i = 0; token != NULL; token = strtok(NULL, separator), i++) {
// Allocates space for data cell
tokens[i] = (long double*)malloc(sizeof(long double));
tokens[i] = (char*)malloc(sizeof(long double));
if (tokens[i] == NULL) {
perror("Error allocating token cell value");
perror("Error allocating feature for dataPoint. Exiting");
exit(1);
}
*((long double*)tokens[i]) = strtold(token, NULL);
strcpy(tokens[i], token);
}

// Remove trailing '\n'
// size_t lastStringLen = strlen(tokens[--i]);
// tokens[i][lastStringLen - 1] = '\0';
return tokens;
}
Loading

0 comments on commit fc67b3d

Please sign in to comment.