multithreading support

main
Sascha Nitsch 2024-02-26 01:48:36 +01:00
parent f53d989793
commit 20afc8908c
1 changed files with 179 additions and 69 deletions

248
main.cpp
View File

@ -16,9 +16,14 @@
#include <Magick++.h>
#include <string.h>
#include <sys/time.h>
#include <inttypes.h>
#include <semaphore>
#include <barrier>
#include <vector>
#include <unordered_map>
#include "inttypes.h"
#include <mutex>
#include <thread>
#include <condition_variable>
// which basic nail placement algorithms should be used
// #define grid
@ -27,10 +32,51 @@
class Main {
private:
/// indicator, which threads are busy
uint64_t m_busyFlag;
/// number of threads
int16_t m_numThreads;
/// worker threads
std::vector<std::thread> m_worker;
/// result from checkLine thread
int64_t* m_lineResult;
/// next target to process in thread pool
int16_t m_nextTarget;
/// result pool lock
std::mutex m_resultLock;
/// mutex for work notification
std::mutex m_workMutex;
/// work notification condition
std::condition_variable m_workNotification;
/// mutex for finished motification
std::mutex m_finishedMutex;
/// finished notification condition
std::binary_semaphore m_finishedNotification{0};
/// sync point for worker threads and main
std::barrier<void(*)()> m_syncPoint;
/// last position (number of nail)
int16_t m_lastPosition = 0;
/// definition of a point for our dwarn line vector
/// tasks left
int32_t m_tasksLeft;
/// running flag for threads
bool m_running;
/// list of nails
const std::vector<uint32_t>& m_nails;
/// definition of a point for our drawn line vector
struct Point {
/// x coordinate
uint16_t x;
@ -61,20 +107,20 @@ class Main {
/// penalty duplication factor
float m_duplicateFactor;
/// map of used paths
uint16_t *m_usedpaths;
/// number of nails
int16_t m_numberOfNails;
/// the actual weight function to calculate how off we are to the target
/// \param value current value
/// \param target desired target
/// \retval distance to target
inline int64_t weightFunction(int16_t value, int16_t target) {
inline int64_t weightFunction(int16_t value, int16_t target) const {
return (value - target) * (value - target);
}
/// swaps two numbers
/// \param a first number
/// \param b second number
@ -163,16 +209,13 @@ class Main {
return pil;
}
float checkLine(int16_t target) {
float checkLine(int16_t target) const {
if (m_lastPosition == target) return INT64_MAX;
/// the diff on current lastPosition -> target
int64_t testDiff = 0;
uint16_t src = std::min(m_lastPosition, target);
uint16_t dst = std::max(m_lastPosition, target);
td_linesFromSource::const_iterator lttIter = m_linesFromSource.find((src << 16) + dst);
/*if (lttIter == m_linesFromSource.end()) {
printf("itt fail %i %i\n", src, dst);
abort();
}*/
// calculate difference to target
// for each point
td_pointsInLine::const_iterator pilIter = lttIter->second.begin();
@ -196,27 +239,100 @@ class Main {
return testDiff * duplicatePenalty;
}
void checkLines(int16_t tid) {
bool first = true;
while (m_running) {
m_syncPoint.arrive_and_wait();
// wait for notification
{
std::unique_lock<std::mutex> lock(m_workMutex);
m_busyFlag &= ~(1 << tid);
if (!m_busyFlag) {
if (!first) {
m_finishedNotification.release();
} else {
first = false;
}
}
m_workNotification.wait(lock);
m_busyFlag |= (1 << tid);
}
// did we wake up because of shutdown?
if (!m_running) break;
int16_t tasksLeft = 0;
int16_t target = 0;
do { // as long as there is work
{
std::unique_lock<std::mutex> lock(m_resultLock);
target = m_nextTarget++;
tasksLeft = m_tasksLeft--;
}
if (target < m_numberOfNails) {
m_lineResult[target] = checkLine(target);
}
} while (tasksLeft > 0);
}
}
public:
/// \brief constructor
/// \param nail vector with nail positions
/// \param duplicateFactor duplication penalty factor
Main(const std::vector<uint32_t>& nails, float duplicateFactor, int16_t numThreads) : m_syncPoint(numThreads + 1, [](){}), m_nails(nails) {
m_duplicateFactor = duplicateFactor;
m_numberOfNails = nails.size();
m_lineResult = reinterpret_cast<int64_t*>(malloc(sizeof(int64_t) * m_numberOfNails));
for (uint16_t i = 0; i < m_numberOfNails; ++i) {
m_lineResult[i] = INT64_MAX;
}
// a lookup of used paths to count repeats
m_usedpaths = reinterpret_cast<uint16_t*>(malloc(m_numberOfNails * m_numberOfNails * sizeof(uint16_t)));
bzero(m_usedpaths, m_numberOfNails * m_numberOfNails * sizeof(uint16_t));
m_nextTarget = 0;
m_imgWidth = 0;
m_running = true;
m_targetState = NULL;
m_currentState = NULL;
m_tasksLeft = 0;
m_numThreads = numThreads;
m_busyFlag = 0;
m_worker.reserve(numThreads);
for (uint16_t i = 0; i < numThreads; ++i) {
m_worker.push_back(std::thread(&Main::checkLines, this, i));
}
// wait until all threads are there
m_syncPoint.arrive_and_wait();
}
/// destructor
~Main() {
m_running = false;
{
std::unique_lock<std::mutex> lock(m_workMutex);
m_workNotification.notify_all();
}
for (uint16_t i = 0; i < m_numThreads; ++i) {
m_worker[i].join();
}
free(m_lineResult);
}
/// \brief main function
/// \param resolutionX X resolution of internal image
/// \param resolutionY Y resolution of internal image
/// \param nail vector with nail positions
/// \param maxIter maximal number of iterations to run
/// \param duplicateFactor duplication penality factor
/// \param lineColor line color to use
int run(const char* imageName, Magick::Image* img, uint16_t resolutionX, uint16_t resolutionY, int16_t requestedNumberOfNails, std::vector<uint32_t> nails, uint16_t maxIter, float duplicateFactor, uint8_t lineColor) {
m_duplicateFactor = duplicateFactor;
int16_t numberOfNails = nails.size();
printf("res: %ix%i nails: %i maxIter: %i duplicatePenalty %.1f color: %i\n", resolutionX, resolutionY, numberOfNails, maxIter, duplicateFactor, lineColor);
int run(const char* imageName, Magick::Image* img, uint16_t resolutionX, uint16_t resolutionY, int16_t requestedNumberOfNails, uint16_t maxIter, uint8_t lineColor) {
printf("res: %ix%i nails: %i maxIter: %i duplicatePenalty %.1f color: %i\n", resolutionX, resolutionY, m_numberOfNails, maxIter, m_duplicateFactor, lineColor);
// for time measurement
struct timeval tv1, tv2;
gettimeofday(&tv1, NULL);
for (uint16_t src = 0; src < numberOfNails; ++src) {
for (uint16_t dst = src + 1; dst < numberOfNails; ++dst) {
td_pointsInLine pointsInLine = drawAALine(nails[src] >> 16, nails[src] & 0xFFFF, nails[dst] >> 16, nails[dst] & 0xFFFF, lineColor);
for (uint16_t src = 0; src < m_numberOfNails; ++src) {
for (uint16_t dst = src + 1; dst < m_numberOfNails; ++dst) {
td_pointsInLine pointsInLine = drawAALine(m_nails[src] >> 16, m_nails[src] & 0xFFFF, m_nails[dst] >> 16, m_nails[dst] & 0xFFFF, lineColor);
m_linesFromSource.insert(std::make_pair((src << 16) + dst, pointsInLine));
}
}
@ -253,28 +369,21 @@ class Main {
std::vector<uint16_t> path;
// add start position
path.push_back(0);
m_numberOfNails = numberOfNails;
// a lookup of used paths to count repeats
m_usedpaths = reinterpret_cast<uint16_t*>(malloc(numberOfNails * numberOfNails * sizeof(uint16_t)));
bzero(m_usedpaths, numberOfNails * numberOfNails * sizeof(uint16_t));
/// last thread end position
m_lastPosition = 0;
/// storage for the current state (all previous drawn threads)
m_currentState = reinterpret_cast<int16_t*>(malloc(m_imgWidth * imgHeight * 2));
/// temp storage to save (current) best version, will be continously updated
int16_t* bestState = reinterpret_cast<int16_t*>(malloc(m_imgWidth * imgHeight * 2));
// clear states
uint32_t widthXheight = m_imgWidth * imgHeight;
for (uint32_t i = 0; i < widthXheight; ++i) {
m_currentState[i] = 255;
bestState[i] = 255;
}
// current iteration
uint32_t iter = 0;
// list of used nails with their counter
uint8_t usedPins[numberOfNails] = {0};
uint8_t usedPins[m_numberOfNails] = {0};
// number of continous jump tries if we got stuck
uint16_t jumps = 0;
/// total diff from currentState to targetState
@ -284,8 +393,8 @@ class Main {
for (uint32_t i = 0; i < widthXheight; ++i) {
totalDiff += weightFunction(255, m_targetState[i]);
}
printf("start %li\n", totalDiff);
while ((iter < maxIter) && jumps*2 < numberOfNails) {
printf("start diff %li\n", totalDiff);
while ((iter < maxIter) && jumps*2 < m_numberOfNails) {
++iter;
#ifdef SANITYCHECK
int64_t sanity = 0;
@ -300,35 +409,39 @@ class Main {
#endif
/// current best difference
int64_t bestDiff = INT64_MAX;
/// compensated diff includes penality when reusing paths
int64_t compensatedBestDiff = INT64_MAX;
int16_t bestTarget = -1;
// printf("source %i\n", m_lastPosition); fflush(stdout);
for (int16_t target = 0; target < numberOfNails; ++target) {
{
std::unique_lock<std::mutex> lock(m_resultLock);
m_nextTarget = 0;
m_tasksLeft = m_numberOfNails;
}
// notify threads
{
std::unique_lock<std::mutex> lock(m_workMutex);
m_workNotification.notify_all();
}
m_syncPoint.arrive_and_wait();
// wait for threads, results are in m_lineResults
m_finishedNotification.acquire();
// search best result
for (int16_t target = 0; target < m_numberOfNails; ++target) {
if (target == m_lastPosition) continue;
int64_t diff = checkLine(target);
if (diff < bestDiff) {
if (m_lineResult[target] < bestDiff) {
bestTarget = target;
bestDiff = diff;
bestDiff = m_lineResult[target];
}
}
// printf("bestTarget %i diff %li\n", bestTarget, bestDiff);
if (bestDiff < 0) {
// apply current best
td_linesFromSource::const_iterator lttIter = m_linesFromSource.find((std::min(m_lastPosition, bestTarget) << 16) + std::max(m_lastPosition, bestTarget));
/*if (lttIter == m_linesFromSource.end()) {
printf("iter fail %i %i\n", m_lastPosition, bestTarget);
abort();
}*/
td_pointsInLine::const_iterator pilIter = lttIter->second.begin();
while (pilIter != lttIter->second.end()) {
uint16_t x = (*pilIter).x;
uint16_t y = (*pilIter).y;
int16_t sub = (*pilIter).color;
uint32_t index = y * m_imgWidth + x;
int16_t cur = m_currentState[index];
cur -= sub;
bestState[index] = cur;
m_currentState[index] -= sub;
++pilIter;
}
} else {
@ -339,7 +452,7 @@ class Main {
path.pop_back();
}
// select next target randomly (kind of, intentially producing the same numbers)
bestTarget = random() % numberOfNails;
bestTarget = random() % m_numberOfNails;
path.push_back(bestTarget);
m_lastPosition = bestTarget;
++usedPins[bestTarget];
@ -359,16 +472,16 @@ class Main {
path.push_back(bestTarget);
// update used pins
++usedPins[bestTarget];
// update diff
totalDiff += bestDiff;
// progress report
if (iter % 100 == 0) {
printf("best %4i -> %4i(%4i, %4i) diff %9li (%12li) iter %5i path %i\n", m_lastPosition, bestTarget, nails[bestTarget] >> 16, nails[bestTarget] & 0xFFFF, compensatedBestDiff, totalDiff + bestDiff, iter, m_usedpaths[std::min(m_lastPosition, bestTarget) * m_numberOfNails + std::max(m_lastPosition, bestTarget)]);
printf("best %4i -> %4i(%4i, %4i) diff %12li iter %5i path %i\n", m_lastPosition, bestTarget, m_nails[bestTarget] >> 16, m_nails[bestTarget] & 0xFFFF,
totalDiff, iter, m_usedpaths[std::min(m_lastPosition, bestTarget) * m_numberOfNails + std::max(m_lastPosition, bestTarget)]);
}
// set new start position
m_lastPosition = bestTarget;
// update diff
totalDiff += bestDiff;
// update current state from best map
memcpy(m_currentState, bestState, widthXheight * 2);
}
printf("size %li\n", path.size());
// we are done, create output svg
@ -377,9 +490,9 @@ class Main {
uint32_t counter = 0;
for (uint16_t i : path) {
if ((counter & 255) == 0) {
fprintf(fh, "<path d=\"M%i %i", nails[i] >> 16, nails[i] & 0xffff);
fprintf(fh, "<path d=\"M%i %i", m_nails[i] >> 16, m_nails[i] & 0xffff);
} else {
fprintf(fh, "L%i %i", nails[i] >> 16, nails[i] & 0xffff);
fprintf(fh, "L%i %i", m_nails[i] >> 16, m_nails[i] & 0xffff);
}
if ((counter & 255) == 255) {
fprintf(fh, "\" />\n");
@ -392,12 +505,11 @@ class Main {
gettimeofday(&tv2, NULL);
float timeNeeded = (tv2.tv_sec - tv1.tv_sec) + (tv2.tv_usec - tv1.tv_usec) / 1000000.0;
fprintf(fh, "</g>\n");
fprintf(fh, "<text x=\"30\" y=\"10\" style=\"font-weight:bold;font-size:60px;font-family:'DejaVu Serif'\"><tspan x=\"30\" y=\"70\">%s %i %i</tspan><tspan x=\"70\" y=\"150\"> %i %i %.2f %i</tspan><tspan x=\"30\" y=\"%i\">%i nails, %li paths, %.1f sec</tspan></text>", imageName, resolutionX, resolutionY, requestedNumberOfNails, maxIter, duplicateFactor, lineColor, imgHeight - 30, numberOfNails, path.size(), timeNeeded);
fprintf(fh, "<text x=\"30\" y=\"10\" style=\"font-weight:bold;font-size:60px;font-family:'DejaVu Serif'\"><tspan x=\"30\" y=\"70\">%s %i %i</tspan><tspan x=\"70\" y=\"150\"> %i %i %.2f %i</tspan><tspan x=\"30\" y=\"%i\">%i nails, %li paths, %.1f sec</tspan></text>", imageName, resolutionX, resolutionY, requestedNumberOfNails, maxIter, m_duplicateFactor, lineColor, imgHeight - 30, m_numberOfNails, path.size(), timeNeeded);
fprintf(fh, "</svg>");
fclose(fh);
// cleanup
free(m_targetState);
free(bestState);
free(m_currentState);
free(m_usedpaths);
path.clear();
@ -417,7 +529,7 @@ int main(int argc, char* argv[]) {
const char* imageName = argv[1];
uint16_t resolutionX = atoi(argv[2]);
uint16_t resolutionY = atoi(argv[3]);
uint16_t numberOfNails = atoi(argv[4]);
uint16_t requestedNumberOfNails = atoi(argv[4]);
uint16_t maxIter = atoi(argv[5]);
float duplicateFactor = atof(argv[6]);
uint8_t lineColor = atoi(argv[7]);
@ -441,14 +553,14 @@ int main(int argc, char* argv[]) {
// position nails
#ifdef circle
for (uint16_t i = 0; i < numberOfNails; ++i) {
float x = sin(2.0 * M_PI * i / numberOfNails) * (imgWidth-1) / 2.0 + imgWidth / 2.0;
float y = cos(2.0 * M_PI * i / numberOfNails) * (imgHeight-1) / 2.0 + imgHeight / 2.0;
for (uint16_t i = 0; i < requestedNumberOfNails; ++i) {
float x = sin(2.0 * M_PI * i / requestedNumberOfNails) * (imgWidth-1) / 2.0 + imgWidth / 2.0;
float y = cos(2.0 * M_PI * i / requestedNumberOfNails) * (imgHeight-1) / 2.0 + imgHeight / 2.0;
nails.push_back((static_cast<uint32_t>(floor(x)) << 16) + static_cast<uint16_t>(floor(y)));
}
#endif
#ifdef multicircle
uint16_t count = numberOfNails/1.5;
uint16_t count = requestedNumberOfNails/1.5;
for (uint16_t i = 0; i < count; ++i) {
float x = sin(2.0 * M_PI * i / count) * (imgWidth-1) / 2.0 + imgWidth / 2.0;
float y = cos(2.0 * M_PI * i / count) * (imgHeight-1) / 2.0 + imgHeight / 2.0;
@ -456,7 +568,7 @@ int main(int argc, char* argv[]) {
}
uint16_t width = imgWidth / 1.2 - 1;
uint16_t height = imgHeight / 1.2 - 1;
count = numberOfNails/1.5;
count = requestedNumberOfNails / 1.5;
for (uint16_t i = 0; i < count; ++i) {
float x = sin(2.0 * M_PI * i / count) * (width-1) / 2.0 + imgWidth / 2.0;
float y = cos(2.0 * M_PI * i / count) * (height-1) / 2.0 + imgHeight / 2.0;
@ -464,7 +576,7 @@ int main(int argc, char* argv[]) {
}
width = imgWidth / 1.5 - 1;
height = imgHeight / 1.5 - 1;
count = numberOfNails/2;
count = requestedNumberOfNails / 2;
for (uint16_t i = 0; i < count; ++i) {
float x = sin(2.0 * M_PI * i / count) * (width-1) / 2.0 + imgWidth / 2.0;
float y = cos(2.0 * M_PI * i / count) * (height-1) / 2.0 + imgHeight / 2.0;
@ -472,7 +584,7 @@ int main(int argc, char* argv[]) {
}
width = imgWidth / 2 - 1;
height = imgHeight / 2 - 1;
count = numberOfNails/3;
count = requestedNumberOfNails / 3;
for (uint16_t i = 0; i < count; ++i) {
float x = sin(2.0 * M_PI * i / count) * (width-1) / 2.0 + imgWidth / 2.0;
float y = cos(2.0 * M_PI * i / count) * (height-1) / 2.0 + imgHeight / 2.0;
@ -480,7 +592,7 @@ int main(int argc, char* argv[]) {
}
width = imgWidth / 3 - 1;
height = imgHeight / 3 - 1;
count = numberOfNails/4;
count = requestedNumberOfNails / 4;
for (uint16_t i = 0; i < count; ++i) {
float x = sin(2.0 * M_PI * i / count) * (width-1) / 2.0 + imgWidth / 2.0;
float y = cos(2.0 * M_PI * i / count) * (height-1) / 2.0 + imgHeight / 2.0;
@ -488,7 +600,7 @@ int main(int argc, char* argv[]) {
}
width = imgWidth / 5 - 1;
height = imgHeight / 5 - 1;
count = numberOfNails / 6;
count = requestedNumberOfNails / 6;
for (uint16_t i = 0; i < count; ++i) {
float x = sin(2.0 * M_PI * i / count) * (width-1) / 2.0 + imgWidth / 2.0;
float y = cos(2.0 * M_PI * i / count) * (height-1) / 2.0 + imgHeight / 2.0;
@ -497,7 +609,7 @@ int main(int argc, char* argv[]) {
nails.push_back((static_cast<uint32_t>(floor(imgWidth / 2.0)) << 16) + static_cast<uint16_t>(floor(imgHeight / 2.0)));
#endif
#ifdef grid
uint8_t sq_pins = sqrt(numberOfNails);
uint8_t sq_pins = sqrt(requestedNumberOfNails);
float distX = static_cast<float>(imgWidth - 1) / (sq_pins - 1);
float distY = static_cast<float>(imgHeight - 1) / (sq_pins - 1);
for (uint16_t y = 0; y < sq_pins; ++y) {
@ -508,9 +620,7 @@ int main(int argc, char* argv[]) {
#endif
Main m;
m.run(imageName, &img, resolutionX, resolutionY, numberOfNails, nails, maxIter, duplicateFactor, lineColor);
Main m(nails, duplicateFactor, std::min(std::thread::hardware_concurrency(), 64U));
m.run(imageName, &img, resolutionX, resolutionY, requestedNumberOfNails, maxIter, lineColor);
Magick::TerminateMagick();
}