diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 896abec..0000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -cmake_minimum_required(VERSION 3.14) - -project( - ift630_sts3 - VERSION 0.1.0 - DESCRIPTION "bs project to learn openMPI / openMP" - LANGUAGES C -) - -set(src - src/main.c - ) - -set(CMAKE_DEBUG_POSTFIX d) -add_executable(ift630_sts3 ${src}) - -find_package(OpenMP) #make it REQUIRED, if you want -include_directories(SYSTEM ${OpenMP_INCLUDE_PATH}) -target_link_libraries(ift630_sts3 ${OpenMP_C_LIBRARIES}) - -set_target_properties(ift630_sts3 PROPERTIES DEBUG_POSTFIX ${CMAKE_DEBUG_POSTFIX}) -target_compile_features(ift630_sts3 PRIVATE c_std_99) diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4190ab7 --- /dev/null +++ b/Makefile @@ -0,0 +1,31 @@ +.PHONY: all omp mpi runmpi runomp + +SRCMPI = src/mpi.c +SRCOMP = src/openmp.c +OJB = $(SRC:.c=.o) +OUT = build + +CC = /usr/bin/gcc +MPICC = /usr/bin/mpicc +MPIRUN = /usr/bin/mpirun +CFLAGS = -ansi -Wall -std=c99 -O3 +OMP = -fopenmp +RM = /bin/rm -fr + +all: mpi openmp + cp ./stop_times.txt build + +runmpi: mpi + cd build ; $(MPIRUN) -np 8 ./mpi + +runomp: omp + cd build ; ./omp + +mpi: + $(MPICC) $(SRCMPI) $(CFLAGS) -o $(OUT)/mpi + +omp: + $(CC) $(SRCOMP) $(OMP) -o $(OUT)/omp + +clean: + $(RM) $(OUT)/* diff --git a/README.md b/README.md index 8a92ccd..9bbea72 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ # ift630_sts3 +[see typst doc](./main.pdf) diff --git a/logo.png b/logo.png new file mode 100644 index 0000000..6a6f919 Binary files /dev/null and b/logo.png differ diff --git a/main.pdf b/main.pdf new file mode 100644 index 0000000..3c9b380 Binary files /dev/null and b/main.pdf differ diff --git a/main.typ b/main.typ new file mode 100644 index 0000000..c79c79f --- /dev/null +++ b/main.typ @@ -0,0 +1,76 @@ +#set page( + numbering: "1 / 1", + header: [ + #set text(8pt) + _IFT630 #h(1fr) Violette Paulin_ + ], +) + +#let title(content) = { + pagebreak(weak:true) + set text(size:17pt, weight: "bold") + set align(center) + v(70pt) + [#content] + v(50pt) +} + +#set par( + first-line-indent: 1em, + justify: true, +) + +#title[ + IFT630 - Projet #3 +] + +#show outline.entry.where( + level: 1 +): it => { + v(14pt, weak: false) + strong(it) +} + + +#v(20pt) + +#image("logo.png") + +#align(center)[ + #text(size: 15pt)[ + Violette PAULIN – PAUM1202\ + _Violette.Paulin\@USherbrooke.ca_\ \ + ] +] + +#v(20pt) + +#pagebreak() += Build et test +Une fois le dossier `build` créé, on peut build directement en exécutant +`make all`. Pour tester MPI, `make runmpi`. Pour tester OpenMP, `make runomp`. +Ces commandes créent des fichiers `OutX.txt`. Ceux-ci montrent la clef testée à +gauche, et le temps pour la trouver à droite, séparée par un ':'. + += Performance +Je ne comprends pas la question de mesure de performance. Dans mon cas, mesurer +les performances revient à mesurer la totalité du temps écoulé, ce qui va +comprendre une part non négligeable d'allocation, ainsi que l'overhead des +différentes librairies. Alors que si je mesure le temps moyen pour trouver une +clef, je mesurerais la même chose dans les deux cas. Il m'est donc impossible de +pouvoir tirer une conclusion satisfaisante sur les performances, pour mon +implémentation. J'ai quand même fait le choix de mesurer le temps pour trouver +une clef, sans prendre en compte l'allocation. + +Cependant, OpenMP peut marcher sur tous les cœurs logiques (thread) de ma +machine, alors que MPI ne fonctionne qu'avec les cœurs physiques (il me semble). +Ainsi, on gagne théoriquement en temps global avec OpenMP. + +De plus, je n'ai pas utilisé OpenMP de la manière la plus optimale, faute de +temps. A la place de diviser sur une boucle, je l'ai divisé comme je l'ai fait +pour MPI. Ceci coute plus de temps en allocation, et l'on est contraint à +utiliser plusieurs fichiers + +Dans les deux cas, on s'aperçoit que plus une clef est loin, plus elle est difficile à charger. +C'est le comportement attendu. De plus, les temps semblent être linéaire, ce +qui est encore une fois attendu. diff --git a/src/mpi.c b/src/mpi.c new file mode 100644 index 0000000..3b0e6da --- /dev/null +++ b/src/mpi.c @@ -0,0 +1,340 @@ +#define _XOPEN_SOURCE 700 +#include + +#include +#include +#include +#include +#include + +enum HEADER { + TRIP_ID = 0, + ARRIVAL_TIME, + DEPARTURE_TIME, + STOP_ID, + STOP_SEQUENCE, + STOP_HEADSIGN, + PICKUP_TYPE, + DROP_OFF_TYPE, + SHAPE_DIST_TRAVELED, + TIMEPOINT, + END +}; + +#define DELIM ',' +#define OVERLAP 100 +#define STOP_FILE "./stop_times.txt" + +// adding key here! +int do_map(char *, size_t, char *, int); +void do_reduce(size_t); +int parprocess(MPI_File *, const int, const int, char *); + +int +main(int argc, char **argv) +{ + // Initialize the MPI environment + MPI_Init(NULL, NULL); + + int world_size, world_rank; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + + char *key; + if (argc != 2) { + if (world_rank == 0) + printf("no args ; indexing all\n"); + key = NULL; + } + else + key = argv[2]; + + size_t res = 0; + + int number, size; + ssize_t err; + MPI_File in; + + if ((err = MPI_File_open(MPI_COMM_WORLD, STOP_FILE, MPI_MODE_RDONLY, + MPI_INFO_NULL, &in))) { + fprintf(stderr, "%s: Couldn't open file %s\n", argv[0], + STOP_FILE); + exit(-1); + } + MPI_Comm_size(MPI_COMM_WORLD, &size); + + res = parprocess(&in, world_rank, world_size, key); + + if (world_rank == 0) { + for (char k = 0; k < world_size; k++) { + MPI_Recv(&res, 1, MPI_UNSIGNED_LONG, MPI_ANY_SOURCE, 0, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if (res) { + //printf("res: found in %lu ns\n", res); break; + } + else + printf("res: not found\n"); + } + } + + MPI_File_close(&in); + MPI_Finalize(); + exit(0); +} + +time_t +substr_time(struct tm a, struct tm b) +{ + return (a.tm_hour * 3600 + a.tm_min * 60 + a.tm_sec) - + (b.tm_hour * 3600 + b.tm_min * 60 + b.tm_sec); +} + +char * +get_word(char *lines, size_t x, size_t y, size_t num_attr, size_t max_attr) +{ + size_t offset = (x * (num_attr * max_attr) + y * (max_attr)); + return lines + offset; +} + +void +fill_lines(char *chunk, size_t num_char, size_t num_lines, size_t num_attr, + size_t max_attr, char *lines) +{ + size_t attr_pos = 0, line_pos = 0, word_pos = 0; + for (size_t k = 0; k < num_char; ++k) { + if (chunk[k] == DELIM) { + // go to next attrib + char *word = get_word(lines, line_pos, attr_pos, + num_attr, max_attr); + memcpy(word, chunk + k - word_pos, word_pos); + word[word_pos] = '\0'; + ++attr_pos; + word_pos = 0; + } + else if (chunk[k] == '\n') { + ++line_pos; + attr_pos = 0; + word_pos = 0; + } + else if (chunk[k] == '\r') {} + else { + ++word_pos; + } + } +} + +void +get_lines_info(char *chunk, size_t num_char, size_t *max_attr_size, + size_t *num_lines) +{ + // count max line size and number of lines + size_t current_attr_size = 0; + for (int k = 0; k < num_char; ++k) { + // LINE + if (chunk[k] == '\n' || chunk[k] == '\r') + ++(*num_lines); + // ATTRIBUTES + if (chunk[k] == DELIM) { + if (current_attr_size > *max_attr_size) + *max_attr_size = current_attr_size; + current_attr_size = 0; + } else + ++current_attr_size; + } +} + +// get num of cols of csv +size_t +get_num_attr(char *chunk) +{ + size_t num_attr = 0; + for (size_t k = 0 ; chunk[k] != '\n' ; ++k) + if (chunk[k] == DELIM) + ++num_attr; + return num_attr; +} + +int +search_key(char *lines, size_t num_lines, size_t num_attr, size_t max_attr, + char *key) +{ + for (size_t k = 0; k < num_lines; ++k) { + if (!strcmp(get_word(lines, k, TRIP_ID, num_attr, max_attr), + key)) + return 1; + } + return 0; +} + +int +get_max_time(char *lines, size_t num_lines, size_t num_attr, size_t max_attr) +{ + struct tm dep_time, arr_time; + time_t max_time = 0; + for (size_t k = 0; k < num_lines; ++k) { + memcpy(&dep_time, + get_word(lines, k, DEPARTURE_TIME, num_attr, max_attr), + sizeof(struct tm)); + memcpy(&arr_time, + get_word(lines, k, ARRIVAL_TIME, num_attr, max_attr), + sizeof(struct tm)); + + strptime(get_word(lines, k, DEPARTURE_TIME, num_attr, max_attr), + "%H:%M:%S", &dep_time); + + strptime(get_word(lines, k, ARRIVAL_TIME, num_attr, max_attr), + "%H:%M:%S", &arr_time); + time_t tmp = substr_time(arr_time, dep_time); + if (tmp > max_time) + max_time = tmp; + } + return max_time; +} + +int +do_map(char *chunk, size_t num_char, char *key, int rank) +{ + size_t num_lines = 0, num_attr = 0; + size_t max_attr_size = 0; + size_t time, res; + + get_lines_info(chunk, num_char, &max_attr_size, &num_lines); + num_attr = get_num_attr(chunk); + + // allocate lines (just a big continuous chunk) + // is a 2d arr of char* + char *lines; + lines = calloc(1, num_lines * num_attr * max_attr_size); + + fill_lines(chunk, num_char, num_lines, num_attr, max_attr_size, lines); + + struct timespec start_time, stop_time; + + // test all ; print all + if (key == NULL) { + char file_name[10]; + + sprintf(file_name, "Out%d.txt", rank); + FILE *file = fopen(file_name, "w"); + for (size_t k = 0; k < num_lines; ++k) { + char *trip_name = get_word(lines, k, TRIP_ID, num_attr, + max_attr_size); + clock_gettime(CLOCK_MONOTONIC, &start_time); + // int res = get_max_time(lines, num_lines, num_attr, + // max_attr_size); + size_t res = search_key(lines, num_lines, num_attr, + max_attr_size, trip_name); + + clock_gettime(CLOCK_MONOTONIC, &stop_time); + + if (!res) + continue; // dont print if err + time = (stop_time.tv_sec - start_time.tv_sec) * + 100000000 + + (stop_time.tv_nsec - start_time.tv_nsec); + fprintf(file, "%s:%lu\n", trip_name, time); + } + fclose(file); + // just so we dont lock + MPI_Send(&time, 1, MPI_UNSIGNED_LONG, 0, 0, MPI_COMM_WORLD); + } else { // search for key + clock_gettime(CLOCK_MONOTONIC, &start_time); + // int res = get_max_time(lines, num_lines, num_attr, + // max_attr_size); + res = search_key(lines, num_lines, num_attr, max_attr_size, + key); + + clock_gettime(CLOCK_MONOTONIC, &stop_time); + + time = (stop_time.tv_sec - start_time.tv_sec) * 100000000 + + (stop_time.tv_nsec - start_time.tv_nsec); + if (res) + MPI_Send(&time, 1, MPI_UNSIGNED_LONG, 0, 0, + MPI_COMM_WORLD); + else + MPI_Send(&res, 1, MPI_UNSIGNED_LONG, 0, 0, + MPI_COMM_WORLD); + } + free(lines); + free(chunk); + return res; +} + +// help from https://stackoverflow.com/questions/12939279/mpi-reading-from-a-text-file +// is hosted under a permissive licence, ty Jonathan Dursi :) +int +parprocess(MPI_File *in, const int rank, const int size, char *key) +{ + // reads revelant lines from file to chunk. + // IN OUR CASE we will use overlap to reach EOF of this line. + // Duplicates dont matter in our case ; res will be the same either way. + size_t proc_size, total_size, total_size_overlap; + char *chunk; + MPI_Offset globalstart; + MPI_Offset globalend; + MPI_Offset filesize; + + MPI_File_get_size(*in, &filesize); + filesize--; /* get rid of text file eof */ + proc_size = filesize / size; + globalstart = rank * proc_size; + globalend = globalstart + proc_size - 1; + if (rank == size - 1) + globalend = filesize - 1; + + /* add overlap to the end of everyone's chunk except last + * proc... */ + size_t globalend_overlap = globalend; + if (rank != size - 1) + globalend_overlap += OVERLAP; + + total_size_overlap = globalend_overlap - globalstart + 1; + total_size = globalend - globalstart + 1; + + /* allocate memory, filled with 0 */ + chunk = calloc(1, total_size); + + ssize_t err; + { + err = MPI_File_read_at_all_begin(*in, globalstart, chunk, + total_size, MPI_CHAR); + if (err) { + printf("error %lu\n", err); + MPI_Finalize(); + } + err = MPI_File_read_at_all_end(*in, chunk, MPI_STATUS_IGNORE); + if (err) { + printf("error %lu\n", err); + MPI_Finalize(); + } + } + + // eh commenting this out, at worst we'll have one unusable line, but this + // still works w/ padding + // fills the first incoherent bytes with \0 + //size_t k = 0; + //if (rank != 0) { // first has no incoherece at begining + // for (; chunk[k] != '\r' && chunk[k] != '\n'; + // ++k) // get number of incoherent bytes :) + // ; + // // reset + + // memmove(chunk, chunk + k, total_size); // - 2: dont count \n\r + //} + + // fill char after next EOL wiht \0 ; starting from proc_size to end of + // overlap + //if (rank != size) { // last doesnt have padding, dont check it + // for (; (chunk[globalend] != '\n' && chunk[globalend] != '\r') && + // globalend < globalend_overlap; + // ++globalend) + // ; + // memset(chunk + globalend, '\0', OVERLAP); + // // + //} + //chunk[total_size_overlap] = '\0'; // just to be sure! + + int max = do_map(chunk, total_size, key, rank); + + return max; +} diff --git a/src/main.c b/src/openmp.c similarity index 96% rename from src/main.c rename to src/openmp.c index a768773..d104d5a 100644 --- a/src/main.c +++ b/src/openmp.c @@ -26,7 +26,7 @@ enum HEADER { #define STOP_FILE "./stop_times.txt" // adding key here! -int do_map(char *, size_t); +int do_map(char *, size_t, int); void do_reduce(size_t); int parprocess(char *, size_t); @@ -169,7 +169,7 @@ get_max_time(char *lines, size_t num_lines, size_t num_attr, size_t max_attr) } int -do_map(char *chunk, size_t num_char) +do_map(char *chunk, size_t num_char, int rank) { size_t num_lines = 0, num_attr = 0; size_t max_attr_size = 0; @@ -188,7 +188,9 @@ do_map(char *chunk, size_t num_char) struct timespec start_time, stop_time; // test all ; print all - char file_name[] = "Out.txt"; + char file_name[] = "OutXX.txt"; + sprintf(file_name, "Out%d.txt", rank); + FILE *file = fopen(file_name, "w"); for (size_t k = 0; k < num_lines; ++k) { @@ -255,7 +257,7 @@ parprocess(char *buff, size_t file_size) memcpy(chunk, buff + start, proc_size); ssize_t err; - int max = do_map(chunk, total_size); + int max = do_map(chunk, total_size, rank); } return 0; }