#include #include #include #include #include #include #include /* Create a simple sorting application that uses the mergesort algorithm to sort a large collection (e.g., 10^7 ) of 32-bit integers. The input data and output results should be stored in files, and the I/O operations should be considered a sequential part of the application. Mergesort is an algorithm that is considered appropriate for parallel execution, although it cannot be equally divided between an arbitrary number of processors, as Amdahl’s and Gustafson-Barsis’ laws require. Assuming that this equal division is possible, estimate α, i.e., the part of the program that can be parallelized, by using a profiler like gprof or valgrind to measure the duration of sort’s execution relative to the overall execution time. Use this number to estimate the predicted speedup for your program. Does α depend on the size of the input? If it does, how should you modify your predictions and their graphical illustration? */ template auto parse_file(std::ifstream &stream, std::vector &vec) -> void { std::string buf; T convbuf; while (std::getline(stream, buf)) { convbuf = static_cast(std::stoul(buf)); vec.emplace_back(std::move(convbuf)); } } auto main(int argc, char *argv[]) -> int { try { const auto path = "dataset.dat"; std::ifstream file(path, std::ios_base::in); if (!file.is_open()) { fmt::print("\nError opening file"); return -1; } fmt::print("\nOpened file {} sucessfully", path); std::vector dataset; parse_file(file, dataset); fmt::print("\nRead {} values from {}", dataset.size(), path); auto dataset_par = dataset; auto dataset_seq = dataset; auto t1 = std::chrono::high_resolution_clock::now(); MergeSorterMT msst([](int32_t a, int32_t b) { return (a > b); }, 0); msst.sort(dataset_seq); auto t2 = std::chrono::high_resolution_clock::now(); auto t_seq = std::chrono::duration_cast(t2 - t1); fmt::print("\nSorted {} entries within {} ms in sequential", dataset_seq.size(), t_seq.count()); const int threads = std::thread::hardware_concurrency(); const int max_depth = std::log(threads); t1 = std::chrono::high_resolution_clock::now(); MergeSorterMT msmt([](int32_t a, int32_t b) { return (a > b); }, max_depth); msmt.sort(dataset_par); t2 = std::chrono::high_resolution_clock::now(); auto t_par = std::chrono::duration_cast(t2 - t1); fmt::print("\nSorted {} entries within {} ms in parallel on a system having {} threads and a recursion depth of {}" "\nresulting in a total count of {} threads", dataset_seq.size(), t_par.count(), threads, max_depth, std::pow(2, max_depth + 1)); auto eq = (dataset_seq == dataset_par); fmt::print("\nCheck whether sorted arrays are equal: {}", (eq) ? "Equal" : "not equal"); fmt::print("\n\n------------Summary------------"); fmt::print("\nt_seq = {: > 5.2f} ms", static_cast(t_seq.count())); fmt::print("\nt_par = {: > 5.2f} ms", static_cast(t_par.count())); fmt::print("\nspeedup = {: > 5.2f}", (1.0 * t_seq / t_par)); fmt::print("\nDelta_t = {: > 5.2f} ms", static_cast(t_seq.count() - t_par.count())); fmt::print("\n-------------------------------"); std::ofstream ofile("dataset.out.dat", std::ios_base::out); if (!ofile.is_open()) { fmt::print("\nError writing to file"); return -1; } for (auto &element: dataset_seq) { ofile << std::to_string(element) << '\n'; } file.close(); ofile.flush(); ofile.close(); fmt::print("\nWritten to output file"); return 0; } catch (std::exception &e) { fmt::print("\nError occured: {}", e.what()); return -1; } }