makefile + pdf

2024-04-10 23:29:23 -04:00 · 2024-04-10 23:29:23 -04:00 · 227b5053b4
commit 227b5053b4
parent c3ba8068d7
8 changed files with 454 additions and 26 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,22 +0,0 @@
-cmake_minimum_required(VERSION 3.14)
-
-project(
-    ift630_sts3
-    VERSION 0.1.0
-	DESCRIPTION "bs project to learn openMPI / openMP"
-    LANGUAGES C
-)
-
-set(src
-	src/main.c
-	)
-
-set(CMAKE_DEBUG_POSTFIX d)
-add_executable(ift630_sts3 ${src})
-
-find_package(OpenMP) #make it REQUIRED, if you want
-include_directories(SYSTEM ${OpenMP_INCLUDE_PATH})
-target_link_libraries(ift630_sts3 ${OpenMP_C_LIBRARIES})
-
-set_target_properties(ift630_sts3 PROPERTIES DEBUG_POSTFIX ${CMAKE_DEBUG_POSTFIX})
-target_compile_features(ift630_sts3 PRIVATE c_std_99)
--- a/31
+++ b/31
@ -0,0 +1,31 @@
+.PHONY: all omp mpi runmpi runomp
+
+SRCMPI 	= src/mpi.c
+SRCOMP 	= src/openmp.c
+OJB 	= $(SRC:.c=.o)
+OUT 	= build
+
+CC 		= /usr/bin/gcc
+MPICC	= /usr/bin/mpicc
+MPIRUN	= /usr/bin/mpirun
+CFLAGS 	= -ansi -Wall -std=c99 -O3
+OMP 	= -fopenmp
+RM 		= /bin/rm -fr 
+
+all: mpi openmp
+	cp ./stop_times.txt build
+
+runmpi: mpi
+	cd build ; $(MPIRUN) -np 8 ./mpi
+
+runomp: omp
+	cd build ; ./omp
+
+mpi:
+	$(MPICC) $(SRCMPI) $(CFLAGS) -o $(OUT)/mpi
+
+omp:
+	$(CC) $(SRCOMP) $(OMP) -o $(OUT)/omp
+
+clean:
+	$(RM) $(OUT)/*
--- a/README.md
+++ b/README.md
@ -1,2 +1,3 @@
 # ift630_sts3

+[see typst doc](./main.pdf)
--- a/logo.png
+++ b/logo.png
--- a/main.pdf
+++ b/main.pdf
--- a/main.typ
+++ b/main.typ
@ -0,0 +1,76 @@
+#set page(
+	numbering: "1 / 1",
+	header: [
+		#set text(8pt)
+		_IFT630 #h(1fr) Violette Paulin_
+	],
+)
+
+#let title(content) = {
+	pagebreak(weak:true)
+	set text(size:17pt, weight: "bold")
+	set align(center)
+	v(70pt)
+	[#content]
+	v(50pt)
+}
+
+#set par(
+  first-line-indent: 1em,
+  justify: true,
+)
+
+#title[
+  IFT630 - Projet #3
+]
+
+#show outline.entry.where(
+  level: 1
+): it => {
+  v(14pt, weak: false)
+  strong(it)
+}
+
+
+#v(20pt)
+
+#image("logo.png")
+
+#align(center)[
+  #text(size: 15pt)[
+    Violette PAULIN – PAUM1202\
+    _Violette.Paulin\@USherbrooke.ca_\ \ 
+  ]
+]
+
+#v(20pt)
+
+#pagebreak()
+= Build et test
+Une fois le dossier `build` créé, on peut build directement en exécutant 
+`make all`. Pour tester MPI, `make runmpi`. Pour tester OpenMP, `make runomp`.
+Ces commandes créent des fichiers `OutX.txt`. Ceux-ci montrent la clef testée à
+gauche, et le temps pour la trouver à droite, séparée par un ':'.
+
+= Performance
+Je ne comprends pas la question de mesure de performance. Dans mon cas, mesurer
+les performances revient à mesurer la totalité du temps écoulé, ce qui va
+comprendre une part non négligeable d'allocation, ainsi que l'overhead des
+différentes librairies. Alors que si je mesure le temps moyen pour trouver une
+clef, je mesurerais la même chose dans les deux cas. Il m'est donc impossible de
+pouvoir tirer une conclusion satisfaisante sur les performances, pour mon
+implémentation. J'ai quand même fait le choix de mesurer le temps pour trouver
+une clef, sans prendre en compte l'allocation.
+
+Cependant, OpenMP peut marcher sur tous les cœurs logiques (thread) de ma 
+machine, alors que MPI ne fonctionne qu'avec les cœurs physiques (il me semble).
+Ainsi, on gagne théoriquement en temps global avec OpenMP.
+
+De plus, je n'ai pas utilisé OpenMP de la manière la plus optimale, faute de
+temps. A la place de diviser sur une boucle, je l'ai divisé comme je l'ai fait
+pour MPI. Ceci coute plus de temps en allocation, et l'on est contraint à
+utiliser plusieurs fichiers
+
+Dans les deux cas, on s'aperçoit que plus une clef est loin, plus elle est difficile à charger.
+C'est le comportement attendu. De plus, les temps semblent être linéaire, ce
+qui est encore une fois attendu.
--- a/src/mpi.c
+++ b/src/mpi.c
@ -0,0 +1,340 @@
+#define _XOPEN_SOURCE 700
+#include <sys/types.h>
+
+#include <mpi.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+enum HEADER {
+	TRIP_ID = 0,
+	ARRIVAL_TIME,
+	DEPARTURE_TIME,
+	STOP_ID,
+	STOP_SEQUENCE,
+	STOP_HEADSIGN,
+	PICKUP_TYPE,
+	DROP_OFF_TYPE,
+	SHAPE_DIST_TRAVELED,
+	TIMEPOINT,
+	END
+};
+
+#define DELIM ','
+#define OVERLAP 100
+#define STOP_FILE "./stop_times.txt"
+
+// adding key here!
+int		do_map(char *, size_t, char *, int);
+void	do_reduce(size_t);
+int		parprocess(MPI_File *, const int, const int, char *);
+
+int
+main(int argc, char **argv)
+{
+	// Initialize the MPI environment
+	MPI_Init(NULL, NULL);
+
+	int world_size, world_rank;
+	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+	MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+
+	char *key;
+	if (argc != 2) {
+		if (world_rank == 0)
+			printf("no args ; indexing all\n");
+		key = NULL;
+	}
+	else 
+		key = argv[2];
+
+	size_t res = 0;
+
+	int number, size;
+	ssize_t err;
+	MPI_File in;
+
+	if ((err = MPI_File_open(MPI_COMM_WORLD, STOP_FILE, MPI_MODE_RDONLY,
+		 MPI_INFO_NULL, &in))) {
+		fprintf(stderr, "%s: Couldn't open file %s\n", argv[0],
+		    STOP_FILE);
+		exit(-1);
+	}
+	MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+	res = parprocess(&in, world_rank, world_size, key);
+
+	if (world_rank == 0) {
+		for (char k = 0; k < world_size; k++) {
+			MPI_Recv(&res, 1, MPI_UNSIGNED_LONG, MPI_ANY_SOURCE, 0,
+			    MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+			if (res) {
+				//printf("res: found in %lu ns\n", res); break;
+			}
+			else
+				printf("res: not found\n");
+		}
+	}
+
+	MPI_File_close(&in);
+	MPI_Finalize();
+	exit(0);
+}
+
+time_t
+substr_time(struct tm a, struct tm b)
+{
+	return (a.tm_hour * 3600 + a.tm_min * 60 + a.tm_sec) -
+		(b.tm_hour * 3600 + b.tm_min * 60 + b.tm_sec);
+}
+
+char *
+get_word(char *lines, size_t x, size_t y, size_t num_attr, size_t max_attr)
+{
+	size_t offset = (x * (num_attr * max_attr) + y * (max_attr));
+	return lines + offset;
+}
+
+void
+fill_lines(char *chunk, size_t num_char, size_t num_lines, size_t num_attr,
+	size_t max_attr, char *lines)
+{
+	size_t attr_pos = 0, line_pos = 0, word_pos = 0;
+	for (size_t k = 0; k < num_char; ++k) {
+		if (chunk[k] == DELIM) {
+			// go to next attrib
+			char *word = get_word(lines, line_pos, attr_pos,
+				num_attr, max_attr);
+			memcpy(word, chunk + k - word_pos, word_pos);
+			word[word_pos] = '\0';
+			++attr_pos;
+			word_pos = 0;
+		}
+		else if (chunk[k] == '\n') {
+			++line_pos;
+			attr_pos = 0;
+			word_pos = 0;
+		}
+		else if (chunk[k] == '\r') {}
+		else {
+			++word_pos;
+		}
+	}
+}
+
+void
+get_lines_info(char *chunk, size_t num_char, size_t *max_attr_size,
+	size_t *num_lines)
+{
+	// count max line size and number of lines
+	size_t current_attr_size = 0;
+	for (int k = 0; k < num_char; ++k) {
+		// LINE
+		if (chunk[k] == '\n' || chunk[k] == '\r')
+			++(*num_lines);
+		// ATTRIBUTES
+		if (chunk[k] == DELIM) {
+			if (current_attr_size > *max_attr_size)
+				*max_attr_size = current_attr_size;
+			current_attr_size = 0;
+		} else
+			++current_attr_size;
+	}
+}
+
+// get num of cols of csv
+size_t
+get_num_attr(char *chunk)
+{
+	size_t num_attr = 0;
+	for (size_t k = 0 ; chunk[k] != '\n' ; ++k)
+		if (chunk[k] == DELIM)
+			++num_attr;
+	return num_attr;
+}
+
+int
+search_key(char *lines, size_t num_lines, size_t num_attr, size_t max_attr,
+    char *key)
+{
+	for (size_t k = 0; k < num_lines; ++k) {
+		if (!strcmp(get_word(lines, k, TRIP_ID, num_attr, max_attr),
+			key))
+			return 1;
+	}
+	return 0;
+}
+
+int
+get_max_time(char *lines, size_t num_lines, size_t num_attr, size_t max_attr)
+{
+	struct tm dep_time, arr_time;
+	time_t max_time = 0;
+	for (size_t k = 0; k < num_lines; ++k) {
+		memcpy(&dep_time,
+		    get_word(lines, k, DEPARTURE_TIME, num_attr, max_attr),
+		    sizeof(struct tm));
+		memcpy(&arr_time,
+		    get_word(lines, k, ARRIVAL_TIME, num_attr, max_attr),
+		    sizeof(struct tm));
+
+		strptime(get_word(lines, k, DEPARTURE_TIME, num_attr, max_attr),
+		    "%H:%M:%S", &dep_time);
+
+		strptime(get_word(lines, k, ARRIVAL_TIME, num_attr, max_attr),
+		    "%H:%M:%S", &arr_time);
+		time_t tmp = substr_time(arr_time, dep_time);
+		if (tmp > max_time)
+			max_time = tmp;
+	}
+	return max_time;
+}
+
+int
+do_map(char *chunk, size_t num_char, char *key, int rank)
+{
+	size_t num_lines = 0, num_attr = 0;
+	size_t max_attr_size = 0;
+	size_t time, res;
+
+	get_lines_info(chunk, num_char, &max_attr_size, &num_lines);
+	num_attr = get_num_attr(chunk);
+
+	// allocate lines (just a big continuous chunk)
+	// is a 2d arr of char*
+	char *lines;
+	lines = calloc(1, num_lines * num_attr * max_attr_size);
+
+	fill_lines(chunk, num_char, num_lines, num_attr, max_attr_size, lines);
+
+	struct timespec start_time, stop_time;
+
+	// test all ; print all
+	if (key == NULL) {
+		char file_name[10];
+
+		sprintf(file_name, "Out%d.txt", rank);
+		FILE *file = fopen(file_name, "w");
+		for (size_t k = 0; k < num_lines; ++k) {
+			char *trip_name = get_word(lines, k, TRIP_ID, num_attr,
+			    max_attr_size);
+			clock_gettime(CLOCK_MONOTONIC, &start_time);
+			// int res = get_max_time(lines, num_lines, num_attr,
+			// max_attr_size);
+			size_t res = search_key(lines, num_lines, num_attr,
+			    max_attr_size, trip_name);
+
+			clock_gettime(CLOCK_MONOTONIC, &stop_time);
+
+			if (!res)
+				continue; // dont print if err
+			time = (stop_time.tv_sec - start_time.tv_sec) *
+				100000000 +
+			    (stop_time.tv_nsec - start_time.tv_nsec);
+			fprintf(file, "%s:%lu\n", trip_name, time);
+		}
+		fclose(file);
+		// just so we dont lock
+		MPI_Send(&time, 1, MPI_UNSIGNED_LONG, 0, 0, MPI_COMM_WORLD);
+	} else { // search for key
+		clock_gettime(CLOCK_MONOTONIC, &start_time);
+		// int res = get_max_time(lines, num_lines, num_attr,
+		// max_attr_size);
+		res = search_key(lines, num_lines, num_attr, max_attr_size,
+		    key);
+
+		clock_gettime(CLOCK_MONOTONIC, &stop_time);
+
+		time = (stop_time.tv_sec - start_time.tv_sec) * 100000000 +
+		    (stop_time.tv_nsec - start_time.tv_nsec);
+		if (res)
+			MPI_Send(&time, 1, MPI_UNSIGNED_LONG, 0, 0,
+			    MPI_COMM_WORLD);
+		else
+			MPI_Send(&res, 1, MPI_UNSIGNED_LONG, 0, 0,
+			    MPI_COMM_WORLD);
+	}
+	free(lines);
+	free(chunk);
+	return res;
+}
+
+// help from https://stackoverflow.com/questions/12939279/mpi-reading-from-a-text-file
+// is hosted under a permissive licence, ty Jonathan Dursi :)
+int
+parprocess(MPI_File *in, const int rank, const int size, char *key)
+{
+	// reads revelant lines from file to chunk.
+	// IN OUR CASE we will use overlap to reach EOF of this line. 
+	// Duplicates dont matter in our case ; res will be the same either way.
+	size_t proc_size, total_size, total_size_overlap;
+	char *chunk;
+	MPI_Offset globalstart;
+	MPI_Offset globalend;
+	MPI_Offset filesize;
+
+	MPI_File_get_size(*in, &filesize);
+	filesize--; /* get rid of text file eof */
+	proc_size = filesize / size;
+	globalstart = rank * proc_size;
+	globalend = globalstart + proc_size - 1;
+	if (rank == size - 1)
+		globalend = filesize - 1;
+
+	/* add overlap to the end of everyone's chunk except last
+	 * proc... */
+	size_t globalend_overlap = globalend;
+	if (rank != size - 1)
+		globalend_overlap += OVERLAP;
+
+	total_size_overlap = globalend_overlap - globalstart + 1;
+	total_size = globalend - globalstart + 1;
+
+	/* allocate memory, filled with 0 */
+	chunk = calloc(1, total_size);
+
+	ssize_t err;
+	{
+		err = MPI_File_read_at_all_begin(*in, globalstart, chunk,
+			total_size, MPI_CHAR);
+		if (err) {
+			printf("error %lu\n", err);
+			MPI_Finalize();
+		}
+		err = MPI_File_read_at_all_end(*in, chunk, MPI_STATUS_IGNORE);
+		if (err) {
+			printf("error %lu\n", err);
+			MPI_Finalize();
+		}
+	}
+
+	// eh commenting this out, at worst we'll have one unusable line, but this
+	// still works w/ padding
+	// fills the first incoherent bytes with \0
+	//size_t k = 0;
+	//if (rank != 0) { // first has no incoherece at begining
+	//	for (; chunk[k] != '\r' && chunk[k] != '\n';
+	//		++k) // get number of incoherent bytes :)
+	//		;
+	//	// reset
+
+	//	memmove(chunk, chunk + k, total_size); // - 2: dont count \n\r
+	//}
+
+	// fill char after next EOL wiht \0 ; starting from proc_size to end of
+	// overlap
+	//if (rank != size) { // last doesnt have padding, dont check it
+	//	for (; (chunk[globalend] != '\n' && chunk[globalend] != '\r') &&
+	//		globalend < globalend_overlap;
+	//		++globalend)
+	//		;
+	//	memset(chunk + globalend, '\0', OVERLAP);
+	//	//
+	//}
+	//chunk[total_size_overlap] = '\0'; // just to be sure!
+
+	int max = do_map(chunk, total_size, key, rank);
+
+	return max;
+}
--- a/src/openmp.c
+++ b/src/openmp.c
@ -26,7 +26,7 @@ enum HEADER {
 #define STOP_FILE "./stop_times.txt"

 // adding key here!
-int		do_map(char *, size_t);
+int		do_map(char *, size_t, int);
 void	do_reduce(size_t);
 int		parprocess(char *, size_t);

@ -169,7 +169,7 @@ get_max_time(char *lines, size_t num_lines, size_t num_attr, size_t max_attr)
 }

 int
-do_map(char *chunk, size_t num_char)
+do_map(char *chunk, size_t num_char, int rank)
 {
 	size_t num_lines = 0, num_attr = 0;
 	size_t max_attr_size = 0;
@ -188,7 +188,9 @@ do_map(char *chunk, size_t num_char)
 	struct timespec start_time, stop_time;

 	// test all ; print all
-		char file_name[] = "Out.txt";
+		char file_name[] = "OutXX.txt";
+		sprintf(file_name, "Out%d.txt", rank);
+		

 		FILE *file = fopen(file_name, "w");
 		for (size_t k = 0; k < num_lines; ++k) {
@ -255,7 +257,7 @@ parprocess(char *buff, size_t file_size)
 		memcpy(chunk, buff + start, proc_size);

 		ssize_t err;
-		int max = do_map(chunk, total_size);
+		int max = do_map(chunk, total_size, rank);
 	}
 	return 0;
 }