diff --git a/Makefile b/Makefile index fa13f6f20ddd894e0d7188f8fb9250837e9aabcf..2c0eeec1fbc29428fb222aa758f0b37744a1de70 100644 --- a/Makefile +++ b/Makefile @@ -4,23 +4,27 @@ SPAR_DATA=/mnt/c/Users/loffjh/Documents/wsl/spar-dev-data/bin/spar DEFS =-fpermissive -std=c++11 -O3 -Isrc/denoiser -FF_PATHS= -DBLOCKING_MODE -spar_ondemand -I/mnt/c/Users/loffjh/Documents/wsl/spar-dev-data/libraries -FF_LIB=-pthread +FF_PATHS=-I../spar-dev-data/libraries +FF_LIB=-lpthread OPENCV_PATH= -I/home/juniorloff/libs/opencv/include -L/home/juniorloff/libs/opencv/lib -I/usr/include/opencv4 OCV_FLAGS=`pkg-config --cflags --libs /home/juniorloff/libs/opencv/lib/pkgconfig/opencv4.pc` OPENCV_LIB=-lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_imgcodecs -lopencv_videoio +SPAR_FLAGS= -DBLOCKING_MODE -spar_ordered -spar_ondemand -spar_file -all: image_processing image_processing_spar image_processing_spar_data +all: image_processing image_processing_spar image_processing_spar_data image_processing_spar_data_manual image_processing: src/image_processing.cpp $(CPP) $(DEFS) $(OPENCV_PATH) $< -o $@ $(OPENCV_LIB) $(OCV_FLAGS) image_processing_spar: src/image_processing_spar.cpp - $(SPAR_CC) $(DEFS) $(OPENCV_PATH) -spar_file $< -o $@ -spar_print > spar_out.cpp $(OPENCV_LIB) $(OCV_FLAGS) + $(SPAR_CC) $(DEFS) $(OPENCV_PATH) $(SPAR_FLAGS) $< -o $@$(OPENCV_LIB) $(OCV_FLAGS) image_processing_spar_data: src/image_processing_spar_data.cpp - $(SPAR_DATA) $(DEFS) $(OPENCV_PATH) -spar_file $< -o $@ $(OPENCV_LIB) $(OCV_FLAGS) + $(SPAR_DATA) $(DEFS) $(OPENCV_PATH) $(SPAR_FLAGS) $< -o $@ -spar_print > spar_out.cpp $(OPENCV_LIB) $(OCV_FLAGS) + +image_processing_spar_data_manual: src/spar_out.cpp + $(CPP) $(DEFS) $(OPENCV_PATH) $(FF_PATHS) $< -o $@ $(OPENCV_LIB) $(FF_LIB) $(OCV_FLAGS) clean: - rm -rf image_processing image_processing_spar image_processing_spar_data *.txt + rm -rf image_processing image_processing_spar image_processing_spar_data *.txt \ No newline at end of file diff --git a/src/image_processing.cpp b/src/image_processing.cpp index b79801bf2bed1dc75f1e76b8245b98a4a077f39a..cfe1fdd79eb519550565071eb902f470421715cf 100644 --- a/src/image_processing.cpp +++ b/src/image_processing.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,7 @@ #include using namespace std; +using namespace chrono; using namespace cv; #define TANH 1 @@ -73,7 +75,8 @@ int main(int argc, char **argv) { string video_dir = argv[1]; - VideoCapture video(video_dir); + VideoCapture video; + video.open(video_dir); if (!video.isOpened()) return -1; @@ -101,6 +104,7 @@ int main(int argc, char **argv) { // initialize the VideoWriter object oVideoWriter.open("output.mp4", cv::VideoWriter::fourcc('m','p','4','v'), fps, frameSize, false); + auto t_start = std::chrono::steady_clock::now(); // Stream while(1){ Mat frame; @@ -533,6 +537,11 @@ int main(int argc, char **argv) { oVideoWriter.write(frame_output); } } + + auto t_end = std::chrono::steady_clock::now(); + + std::cout << "Time (ms): " << std::chrono::duration_cast(t_end - t_start).count() << std::endl; + video.release(); oVideoWriter.release(); diff --git a/src/image_processing_spar.cpp b/src/image_processing_spar.cpp index 535a793ddd4934094e3ebd96f5fb90b6225ab1c3..b02852ca3ae5c5324943f90e0e6995c4101eb468 100644 --- a/src/image_processing_spar.cpp +++ b/src/image_processing_spar.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,7 @@ #include using namespace std; +using namespace chrono; using namespace cv; #define TANH 1 @@ -73,7 +75,8 @@ int main(int argc, char **argv) { string video_dir = argv[1]; - VideoCapture video(video_dir); + VideoCapture video; + video.open(video_dir); if (!video.isOpened()) return -1; @@ -101,6 +104,7 @@ int main(int argc, char **argv) { // initialize the VideoWriter object oVideoWriter.open("output.mp4", cv::VideoWriter::fourcc('m','p','4','v'), fps, frameSize, false); + auto t_start = std::chrono::steady_clock::now(); // Stream [[spar::ToStream, spar::Input(video)]] while(1){ @@ -349,7 +353,7 @@ int main(int argc, char **argv) { } // Stage - [[spar::Stage, spar::Input(frame), spar::Output(frame)]] + [[spar::Stage, spar::Input(frame), spar::Output(frame), spar::Replicate()]] { // Input vector vec_denoiser(dWidth*dHeight); @@ -536,6 +540,11 @@ int main(int argc, char **argv) { oVideoWriter.write(frame_output); } } + + auto t_end = std::chrono::steady_clock::now(); + + std::cout << "Time (ms): " << std::chrono::duration_cast(t_end - t_start).count() << std::endl; + video.release(); oVideoWriter.release(); diff --git a/src/image_processing_spar_data.cpp b/src/image_processing_spar_data.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a4a5338f7881bacc374ad92fa917d42dd38cb16a --- /dev/null +++ b/src/image_processing_spar_data.cpp @@ -0,0 +1,564 @@ + +// Neural Network - https://github.com/ralampay/ann +// Denoiser - https://github.com/fastflow/fastflow/tree/fully-c%2B%2B11/examples/denoiser +// Sobel - https://soubhihadri.medium.com/image-processing-best-practices-c-part-2-c0988b2d3e0c + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "Matrix.cpp" +#include "Neuron.cpp" +#include "Layer.cpp" +#include "utils/Math.cpp" +#include "utils/Misc.cpp" +#include "neural_network/NeuralNetwork.cpp" +#include "neural_network/backPropagation.cpp" +#include "neural_network/feedForward.cpp" +#include "neural_network/setErrors.cpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace chrono; +using namespace cv; + +#define TANH 1 +#define RELU 2 +#define SIGM 3 + +//global variables +VideoWriter oVideoWriter; +int num_frames; +int dWidth; +int dHeight ; +int fps; +int total_frames; +NeuralNetwork * n; +Size frameSize; + +int main(int argc, char **argv) { + + + if(argc < 2) { + cout << "Usage: ./bin video_dir" << endl; + return 1; + } + + string video_dir = argv[1]; + + VideoCapture video; + video.open(video_dir); + + if (!video.isOpened()) return -1; + + // initialize video variables + num_frames = 0; + dWidth = video.get(CAP_PROP_FRAME_WIDTH); + dHeight = video.get(CAP_PROP_FRAME_HEIGHT); + fps = video.get(CAP_PROP_FPS); + total_frames = video.get(CAP_PROP_FRAME_COUNT); + frameSize = Size(static_cast(dWidth), static_cast(dHeight)); + + // initialize neural network parameters + vector topology; + topology.push_back(dWidth*dHeight); + topology.push_back(100); + topology.push_back(100); + topology.push_back(dWidth*dHeight); + double learningRate = 0.05; + double momentum = 1; + double bias = 1; + + // initialize neural network + n = new NeuralNetwork(topology, RELU, SIGM, TANH, bias, learningRate, momentum); + + // initialize the VideoWriter object + oVideoWriter.open("output.mp4", cv::VideoWriter::fourcc('m','p','4','v'), fps, frameSize, false); + + auto t_start = std::chrono::steady_clock::now(); + // Stream + [[spar::ToStream, spar::Input(video)]] + while(1){ + Mat frame; + video >> frame; + + if (frame.empty()) + break; + + cvtColor(frame,frame,COLOR_RGB2GRAY); + frame.convertTo(frame, CV_64FC1); + + + // Stage + [[spar::Stage, spar::Input(frame), spar::Output(frame)]] + { + // Input + vector vec_neural_network((dWidth*dHeight)); + memcpy(vec_neural_network.data(), frame.data, dWidth*dHeight*sizeof(double)); + + // Computation + for (int step = 0; step < 10; step++) { + n->setCurrentInput(vec_neural_network); + n->setCurrentTarget(vec_neural_network); + + // feedForward + Matrix *a; // Matrix of neurons to the left + Matrix *b; // Matrix of weights to the right of layer + Matrix *c; // Matrix of neurons to the next layer + + for(int i = 0; i < (n->topologySize - 1); i++) { + a = n->getNeuronMatrix(i); + b = n->getWeightMatrix(i); + c = new Matrix( + a->getNumRows(), + b->getNumCols(), + false + ); + + if(i != 0) { + a = n->getActivatedNeuronMatrix(i); + } + + // matrix mult + for(int i = 0; i < a->getNumRows(); i++) { + [[spar::Pure]] + for(int j = 0; j < b->getNumCols(); j++) { + for(int k = 0; k < b->getNumRows(); k++) { + double p = a->getValue(i, k) * b->getValue(k, j); + double newVal = c->getValue(i, j) + p; + c->setValue(i, j, newVal); + } + } + } + + for(int c_index = 0; c_index < c->getNumCols(); c_index++) { + n->setNeuronValue(i + 1, c_index, c->getValue(0, c_index) + n->bias); + } + + delete a; + delete b; + delete c; + } + + + n->setErrors(); + // backPropagation + vector newWeights; + Matrix *deltaWeights; + Matrix *gradients; + Matrix *derivedValues; + Matrix *gradientsTransposed; + Matrix *zActivatedVals; + Matrix *tempNewWeights; + Matrix *pGradients; + Matrix *transposedPWeights; + Matrix *hiddenDerived; + Matrix *transposedHidden; + + // PART 1: OUTPUT TO LAST HIDDEN LAYER + int indexOutputLayer = n->topology.size() - 1; + + gradients = new Matrix( + 1, + n->topology.at(indexOutputLayer), + false + ); + + derivedValues = n->layers.at(indexOutputLayer)->matrixifyDerivedVals(); + + for(int i = 0; i < n->topology.at(indexOutputLayer); i++) { + [[spar::Pure]] + { + double e = n->derivedErrors.at(i); + double y = derivedValues->getValue(0, i); + double g = e * y; + gradients->setValue(0, i, g); + } + } + + // Gt * Z + gradientsTransposed = gradients->transpose(); + zActivatedVals = n->layers.at(indexOutputLayer - 1)->matrixifyActivatedVals(); + + deltaWeights = new Matrix( + gradientsTransposed->getNumRows(), + zActivatedVals->getNumCols(), + false + ); + + // matrix mult + for(int i = 0; i < gradientsTransposed->getNumRows(); i++) { + [[spar::Pure]] + for(int j = 0; j < zActivatedVals->getNumCols(); j++) { + for(int k = 0; k < zActivatedVals->getNumRows(); k++) { + double p = gradientsTransposed->getValue(i, k) * zActivatedVals->getValue(k, j); + double newVal = deltaWeights->getValue(i, j) + p; + deltaWeights->setValue(i, j, newVal); + } + } + } + + // COMPUTE FOR NEW WEIGHTS (LAST HIDDEN <-> OUTPUT) + tempNewWeights = new Matrix( + n->topology.at(indexOutputLayer - 1), + n->topology.at(indexOutputLayer), + false + ); + + for(int r = 0; r < n->topology.at(indexOutputLayer - 1); r++) { + [[spar::Pure]] + for(int c = 0; c < n->topology.at(indexOutputLayer); c++) { + + double originalValue = n->weightMatrices.at(indexOutputLayer - 1)->getValue(r, c); + double deltaValue = deltaWeights->getValue(c, r); + + originalValue = n->momentum * originalValue; + deltaValue = n->learningRate * deltaValue; + + tempNewWeights->setValue(r, c, (originalValue - deltaValue)); + } + } + + newWeights.push_back(tempNewWeights); + + delete gradientsTransposed; + delete zActivatedVals; + delete deltaWeights; + delete derivedValues; + + // PART 2: LAST HIDDEN LAYER TO INPUT LAYER + for(int i = (indexOutputLayer - 1); i > 0; i--) { + pGradients = gradients; + + transposedPWeights = n->weightMatrices.at(i)->transpose(); + + gradients = new Matrix( + pGradients->getNumRows(), + transposedPWeights->getNumCols(), + false + ); + + // matrix mult + for(int i = 0; i < pGradients->getNumRows(); i++) { + [[spar::Pure]] + for(int j = 0; j < transposedPWeights->getNumCols(); j++) { + for(int k = 0; k < transposedPWeights->getNumRows(); k++) { + double p = pGradients->getValue(i, k) * transposedPWeights->getValue(k, j); + double newVal = gradients->getValue(i, j) + p; + gradients->setValue(i, j, newVal); + } + } + } + + + hiddenDerived = n->layers.at(i)->matrixifyDerivedVals(); + + for(int colCounter = 0; colCounter < hiddenDerived->getNumCols(); colCounter++) { + [[spar::Pure]] + { + double g = gradients->getValue(0, colCounter) * hiddenDerived->getValue(0, colCounter); + gradients->setValue(0, colCounter, g); + } + } + + if(i == 1) { + zActivatedVals = n->layers.at(0)->matrixifyVals(); + } else { + zActivatedVals = n->layers.at(i-1)->matrixifyActivatedVals(); + } + + transposedHidden = zActivatedVals->transpose(); + + deltaWeights = new Matrix( + transposedHidden->getNumRows(), + gradients->getNumCols(), + false + ); + + // matrix mult + for(int i = 0; i < transposedHidden->getNumRows(); i++) { + [[spar::Pure]] + for(int j = 0; j < gradients->getNumCols(); j++) { + for(int k = 0; k < gradients->getNumRows(); k++) { + double p = transposedHidden->getValue(i, k) * gradients->getValue(k, j); + double newVal = deltaWeights->getValue(i, j) + p; + deltaWeights->setValue(i, j, newVal); + } + } + } + // update weights + tempNewWeights = new Matrix( + n->weightMatrices.at(i - 1)->getNumRows(), + n->weightMatrices.at(i - 1)->getNumCols(), + false + ); + + for(int r = 0; r < tempNewWeights->getNumRows(); r++) { + [[spar::Pure]] + for(int c = 0; c < tempNewWeights->getNumCols(); c++) { + double originalValue = n->weightMatrices.at(i - 1)->getValue(r, c); + double deltaValue = deltaWeights->getValue(r, c); + + originalValue = n->momentum * originalValue; + deltaValue = n->learningRate * deltaValue; + + tempNewWeights->setValue(r, c, (originalValue - deltaValue)); + } + } + + newWeights.push_back(tempNewWeights); + + delete pGradients; + delete transposedPWeights; + delete hiddenDerived; + delete zActivatedVals; + delete transposedHidden; + delete deltaWeights; + } + delete gradients; + + for(int i = 0; i < n->weightMatrices.size(); i++) { + delete n->weightMatrices[i]; + } + + n->weightMatrices.clear(); + + reverse(newWeights.begin(), newWeights.end()); + + for(int i = 0; i < newWeights.size(); i++) { + n->weightMatrices.push_back(newWeights[i]); + } + } + + // Output + frame = Mat(frameSize, CV_64FC1, n->layers.at(n->layers.size()-1)->getVals().data()); + frame.convertTo(frame, CV_8UC1); + } + + // Stage + [[spar::Stage, spar::Input(frame), spar::Output(frame), spar::Replicate()]] + { + // Input + vector vec_denoiser(dWidth*dHeight); + memcpy(vec_denoiser.data(), frame.data, dWidth*dHeight*sizeof(unsigned char)); + + // Computation + Detector * detector = new DetectorGaussian(dHeight, dWidth, false); + detector->init(vec_denoiser.data()); + + int * noisyMap = (int *) malloc(dHeight * dWidth * sizeof(int)); + for (unsigned int ri = 0; ri < dHeight; ++ri){ + for (unsigned int ci = 0, x = ri * dWidth; ci < dWidth; ++ci, ++x){ + noisyMap[x] = (detector->isPixelNoisy(vec_denoiser.data(), vec_denoiser.at(x), ri, ci)) ? vec_denoiser.at(x) : -1; + + + } + } + + //array of noisy pixels + unsigned int n_noisy = 0; + for (unsigned int ri = 0; ri < dHeight; ++ri) + for (unsigned int ci = 0, x = ri * dWidth; ci < dWidth; ++ci, ++x) + if (noisyMap[x] >= 0) + ++n_noisy; + unsigned int * noisyPixels = (unsigned int *) malloc(n_noisy * sizeof(unsigned int)); + for (unsigned int i = 0, ri = 0; ri < dHeight; ++ri) + for (unsigned int ci = 0, x = ri * dWidth; ci < dWidth; ++ci, ++x) + if (noisyMap[x] >= 0) + noisyPixels[i++] = x; + + unsigned char * restore_denoiser = (unsigned char *) malloc(dHeight * dWidth * sizeof(unsigned char)); + unsigned char * diff = (unsigned char *) malloc(n_noisy * sizeof(unsigned char)); + + memset(diff, 0, n_noisy * sizeof(unsigned char)); + memcpy(restore_denoiser, vec_denoiser.data(), dHeight * dWidth * sizeof(unsigned char)); + + float *residuals = (float *) malloc(n_noisy * sizeof(float)); + bool fixed = false; + unsigned int restore_cycles = 0; + float alfa = 1.3; + float beta = 5; + bool fixed_cycles = false; + unsigned int max_cycles = 200; + while (true) { + //restore + for (unsigned int i = 0; i < n_noisy; ++i) { + unsigned int x = noisyPixels[i]; + int idx = x; + + //get the pixel and the 8 closest + unsigned char pixel = vec_denoiser.at(idx); + //up + int idx_neighbor = idx - dWidth * (idx >= dWidth); + unsigned char up_val = vec_denoiser.at(idx_neighbor); + unsigned char up_noisy = (noisyMap[idx_neighbor] >= 0); + //down + idx_neighbor = idx + dWidth * (idx < ((dHeight - 1) * dWidth)); + unsigned char down_val = vec_denoiser.at(idx_neighbor); + unsigned char down_noisy = (noisyMap[idx_neighbor] >= 0); + //left + idx_neighbor = idx - ((idx % dWidth) > 0); + unsigned char left_val = vec_denoiser.at(idx_neighbor); + unsigned char left_noisy = (noisyMap[idx_neighbor] >= 0); + //right + idx_neighbor = idx + ((idx % dWidth) < (dWidth - 1)); + unsigned char right_val = vec_denoiser.at(idx_neighbor); + unsigned char right_noisy = (noisyMap[idx_neighbor] >= 0); + //up-left + idx_neighbor = idx - 1 - dWidth * (idx >= dWidth); + unsigned char upl_val = vec_denoiser.at(idx_neighbor); + unsigned char upl_noisy = (noisyMap[idx_neighbor] >= 0); + //up-right + idx_neighbor = idx + 1 - dWidth * (idx >= dWidth); + unsigned char upr_val = vec_denoiser.at(idx_neighbor); + unsigned char upr_noisy = (noisyMap[idx_neighbor] >= 0); + //down-left + idx_neighbor = idx - 1 + dWidth * (idx < ((dHeight - 1) * dWidth)); + unsigned char downl_val = vec_denoiser.at(idx_neighbor); + unsigned char downl_noisy = (noisyMap[idx_neighbor] >= 0); + //down-right + idx_neighbor = idx + 1 + dWidth * (idx < ((dHeight - 1) * dWidth)); + unsigned char downr_val = vec_denoiser.at(idx_neighbor); + unsigned char downr_noisy = (noisyMap[idx_neighbor] >= 0); + + //compute the correction + unsigned char u = 0; + float S; + float Fu, u_min = 0.0f, Fu_prec = FLT_MAX; // 256.0f; + float beta_ = beta; // / 2; + for (int uu = 0; uu < 256; ++uu) { + u = (unsigned char) uu; + Fu = 0.0f; + S = 0.0f; + S += (float) (2 - up_noisy) * sqrt(_ABS((int) u - (int) up_val) * _ABS((int) u - (int) up_val) + alfa); + S += (float) (2 - down_noisy) * sqrt(_ABS(((int) u - (int) down_val)) * _ABS(((int) u - (int) down_val)) + alfa); + S += (float) (2 - left_noisy) * sqrt(_ABS(((int) u - (int) left_val)) * _ABS(((int) u - (int) left_val)) + alfa); + S += (float) (2 - right_noisy) * sqrt(_ABS(((int) u - (int) right_val)) * _ABS(((int) u - (int) right_val)) + alfa); + S += (float) (2 - upl_noisy) * sqrt(_ABS((int) u - (int) upl_val) * _ABS((int) u - (int) upl_val) + alfa); + S += (float) (2 - upr_noisy) * sqrt(_ABS((int) u - (int) upr_val) * _ABS((int) u - (int) upr_val) + alfa); + S += (float) (2 - downl_noisy) * sqrt(_ABS(((int) u - (int) downl_val)) * _ABS(((int) u - (int) downl_val)) + alfa); + S += (float) (2 - downr_noisy) * sqrt(_ABS(((int) u - (int) downr_val)) * _ABS(((int) u - (int) downr_val)) + alfa); + + Fu = ((float) _ABS(u - pixel) + (beta_ * S)); + if (Fu < Fu_prec) { + u_min = u; + Fu_prec = Fu; + } + } + restore_denoiser[x] = (unsigned char) (u_min + 0.5f); //round + + unsigned char newdiff = (unsigned char) (_ABS((int) (restore_denoiser[x]) - noisyMap[x])); + residuals[i] = (float) (_ABS((int) newdiff - (int) (diff[i]))); + diff[i] = newdiff; + } + //reduce residuals + float residual = 0.0f; + for (unsigned int i = 0; i < n_noisy; ++i) + residual += residuals[i]; + residual /= n_noisy; + ++restore_cycles; + //check convergence + if (fixed_cycles) + fixed = restore_cycles == max_cycles; + else + fixed = residual < RESIDUAL_THRESHOLD || restore_cycles >= max_cycles; + if (fixed) + break; + } + + //clean-up + free(noisyPixels); + free(diff); + free(residuals); + free(noisyMap); + delete detector; + + // Output + frame = Mat(frameSize, CV_8UC1, restore_denoiser); + } + // Stage + [[spar::Stage, spar::Input(frame), spar::Replicate()]] + { + + // Input + Mat frame_output = Mat(frameSize, CV_8UC1); + + // Computation + { + std::vector kernel_x({1,2,1,0,0,0,-1,-2,-1}); + std::vector kernel_y({1,0,-1,2,0,-2,1,0,-1}); + int kernel_size = 3; + + unsigned char *data_in = (unsigned char*)(frame.data); + unsigned char *data_out = (unsigned char*)(frame_output.data); + + for (int row = 0; row < dHeight; ++row) { + for (int col = 0; col < dWidth ; col += 1) { + + if (row <= kernel_size/2 || row >= dHeight-kernel_size/2 || + col <= kernel_size/2 || col >= dWidth-kernel_size/2){ + data_out[frame_output.step*row+col] = 0; + continue; + } + + int sum_x = 0, sum_y = 0; + int k_ind = 0; + for (int k_row = -kernel_size/2; k_row <= kernel_size/2; ++k_row) { + for (int k_col = -kernel_size/2; k_col <= kernel_size/2; ++k_col) { + sum_x += kernel_x[k_ind]*data_in[frame.step*(row+k_row)+col+k_col]; + sum_y += kernel_y[k_ind]*data_in[frame.step*(row+k_row)+col+k_col]; + k_ind++; + } + } + int G = unsigned(std::sqrt(sum_x*sum_x+sum_y*sum_y)); + data_out[frame_output.step*row+col] = std::min(G,255); + } + } + } + + // Output + frame_output.convertTo(frame_output, CV_8UC1); + + num_frames++; + oVideoWriter.write(frame_output); + } + } + + auto t_end = std::chrono::steady_clock::now(); + + std::cout << "Time (ms): " << std::chrono::duration_cast(t_end - t_start).count() << std::endl; + + video.release(); + oVideoWriter.release(); + + return 0; +} \ No newline at end of file diff --git a/src/spar_out.cpp b/src/spar_out.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4358307cea2993b156b9e385458c94cd9c7e703e --- /dev/null +++ b/src/spar_out.cpp @@ -0,0 +1,610 @@ + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include "Matrix.cpp" + +#include "Neuron.cpp" + +#include "Layer.cpp" + +#include "utils/Math.cpp" + +#include "utils/Misc.cpp" + +#include "neural_network/NeuralNetwork.cpp" + +#include "neural_network/backPropagation.cpp" + +#include "neural_network/feedForward.cpp" + +#include "neural_network/setErrors.cpp" + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +using namespace std; +using namespace chrono; +using namespace cv; +#define TANH 1 + +#define RELU 2 + +#define SIGM 3 + +VideoWriter oVideoWriter; +int num_frames; +int dWidth; +int dHeight; +int fps; +int total_frames; +NeuralNetwork * n; +Size frameSize; +#include "ff/ff.hpp" + +#include "ff/pipeline.hpp" + +#include "ff/farm.hpp" + +#include "ff/parallel_for.hpp" + +using namespace ff; +namespace spar{ + static inline ssize_t get_mac_core() { + ssize_t n = 1; + FILE * f; + f = popen("cat /proc/cpuinfo |grep processor | wc -l","r"); + if(fscanf(f,"%ld",& n) == EOF) + { + pclose (f); + return n; + } + pclose (f); + return n; + } + static inline ssize_t get_env_num_workers(int level) { + ssize_t n = 1; + FILE * f; + if(level == 1) + f = popen("echo $SPAR_NUM_WORKERS","r"); + if(level == 2) + f = popen("echo $SPAR_NUM_WORKERS2","r"); + if(fscanf(f,"%ld",& n) == EOF) + { + pclose (f); + return n; + } + pclose (f); + return n; + } + static inline ssize_t get_Num_Workers() { + ssize_t w_size = get_env_num_workers(1); + if(w_size > 0) + { + return w_size; + } + return get_mac_core(); + } + static inline ssize_t get_Num_Workers2() { + ssize_t w_size = get_env_num_workers(2); + if(w_size > 0) + { + return w_size; + } + return get_mac_core(); + } +} +struct image_processing_spar_data_struct_spar0{ + image_processing_spar_data_struct_spar0() { + } + image_processing_spar_data_struct_spar0(Mat frame) : frame(frame) { + } + ; + Mat frame; +}; +ff::ParallelFor * * combined_spar_pf; +ff::ParallelFor * spar_pf; +struct image_processing_spar_data_Stage_spar00 : ff_node_t < image_processing_spar_data_struct_spar0 >{ + Mat frame; + image_processing_spar_data_struct_spar0 * svc(image_processing_spar_data_struct_spar0 * image_processing_spar_data_Input_spar) { + { + vector < double > vec_neural_network ((dWidth*dHeight)); + memcpy(vec_neural_network.data(),image_processing_spar_data_Input_spar -> frame.data,dWidth*dHeight*sizeof(double)); + + for(int step = 0; step < 10;step++) + { + n -> setCurrentInput(vec_neural_network); + n -> setCurrentTarget(vec_neural_network); + Matrix * a; + Matrix * b; + Matrix * c; + + for(int i = 0; i < (n -> topologySize-1);i++) + { + a = n -> getNeuronMatrix(i); + b = n -> getWeightMatrix(i); + c = new Matrix (a -> getNumRows(),b -> getNumCols(),false); + if(i != 0) + { + a = n -> getActivatedNeuronMatrix(i); + } + combined_spar_pf[0] -> parallel_for(0,a -> getNumRows()+0,1,[&] (int i) { + + for(int j = 0; j < b -> getNumCols();j++) + { + + for(int k = 0; k < b -> getNumRows();k++) + { + double p = a -> getValue(i,k)*b -> getValue(k,j); + double newVal = c -> getValue(i,j)+p; + c -> setValue(i,j,newVal); + } + } + }); + + for(int c_index = 0; c_index < c -> getNumCols();c_index++) + { + n -> setNeuronValue(i+1,c_index,c -> getValue(0,c_index)+n -> bias); + } + delete a; + delete b; + delete c; + } + n -> setErrors(); + vector < Matrix * > newWeights; + Matrix * deltaWeights; + Matrix * gradients; + Matrix * derivedValues; + Matrix * gradientsTransposed; + Matrix * zActivatedVals; + Matrix * tempNewWeights; + Matrix * pGradients; + Matrix * transposedPWeights; + Matrix * hiddenDerived; + Matrix * transposedHidden; + int indexOutputLayer = n -> topology.size()-1; + gradients = new Matrix (1,n -> topology.at(indexOutputLayer),false); + derivedValues = n -> layers.at(indexOutputLayer) -> matrixifyDerivedVals(); + combined_spar_pf[0] -> parallel_for(0,n -> topology.at(indexOutputLayer)+0,1,[&] (int i) { + { + double e = n -> derivedErrors.at(i); + double y = derivedValues -> getValue(0,i); + double g = e*y; + gradients -> setValue(0,i,g); + } + }); + gradientsTransposed = gradients -> transpose(); + zActivatedVals = n -> layers.at(indexOutputLayer-1) -> matrixifyActivatedVals(); + deltaWeights = new Matrix (gradientsTransposed -> getNumRows(),zActivatedVals -> getNumCols(),false); + combined_spar_pf[0] -> parallel_for(0,gradientsTransposed -> getNumRows()+0,1,[&] (int i) { + + for(int j = 0; j < zActivatedVals -> getNumCols();j++) + { + + for(int k = 0; k < zActivatedVals -> getNumRows();k++) + { + double p = gradientsTransposed -> getValue(i,k)*zActivatedVals -> getValue(k,j); + double newVal = deltaWeights -> getValue(i,j)+p; + deltaWeights -> setValue(i,j,newVal); + } + } + }); + tempNewWeights = new Matrix (n -> topology.at(indexOutputLayer-1),n -> topology.at(indexOutputLayer),false); + combined_spar_pf[0] -> parallel_for(0,n -> topology.at(indexOutputLayer-1)+0,1,[&] (int r) { + + for(int c = 0; c < n -> topology.at(indexOutputLayer);c++) + { + double originalValue = n -> weightMatrices.at(indexOutputLayer-1) -> getValue(r,c); + double deltaValue = deltaWeights -> getValue(c,r); + originalValue = n -> momentum*originalValue; + deltaValue = n -> learningRate*deltaValue; + tempNewWeights -> setValue(r,c,(originalValue-deltaValue)); + } + }); + newWeights.push_back(tempNewWeights); + delete gradientsTransposed; + delete zActivatedVals; + delete deltaWeights; + delete derivedValues; + + for(int i = (indexOutputLayer-1); i > 0;i--) + { + pGradients = gradients; + transposedPWeights = n -> weightMatrices.at(i) -> transpose(); + gradients = new Matrix (pGradients -> getNumRows(),transposedPWeights -> getNumCols(),false); + combined_spar_pf[0] -> parallel_for(0,pGradients -> getNumRows()+0,1,[&] (int i) { + + for(int j = 0; j < transposedPWeights -> getNumCols();j++) + { + + for(int k = 0; k < transposedPWeights -> getNumRows();k++) + { + double p = pGradients -> getValue(i,k)*transposedPWeights -> getValue(k,j); + double newVal = gradients -> getValue(i,j)+p; + gradients -> setValue(i,j,newVal); + } + } + }); + hiddenDerived = n -> layers.at(i) -> matrixifyDerivedVals(); + combined_spar_pf[0] -> parallel_for(0,hiddenDerived -> getNumCols()+0,1,[&] (int colCounter) { + { + double g = gradients -> getValue(0,colCounter)*hiddenDerived -> getValue(0,colCounter); + gradients -> setValue(0,colCounter,g); + } + }); + if(i == 1) + { + zActivatedVals = n -> layers.at(0) -> matrixifyVals(); + } else + { + zActivatedVals = n -> layers.at(i-1) -> matrixifyActivatedVals(); + } + transposedHidden = zActivatedVals -> transpose(); + deltaWeights = new Matrix (transposedHidden -> getNumRows(),gradients -> getNumCols(),false); + combined_spar_pf[0] -> parallel_for(0,transposedHidden -> getNumRows()+0,1,[&] (int i) { + + for(int j = 0; j < gradients -> getNumCols();j++) + { + + for(int k = 0; k < gradients -> getNumRows();k++) + { + double p = transposedHidden -> getValue(i,k)*gradients -> getValue(k,j); + double newVal = deltaWeights -> getValue(i,j)+p; + deltaWeights -> setValue(i,j,newVal); + } + } + }); + tempNewWeights = new Matrix (n -> weightMatrices.at(i-1) -> getNumRows(),n -> weightMatrices.at(i-1) -> getNumCols(),false); + combined_spar_pf[0] -> parallel_for(0,tempNewWeights -> getNumRows()+0,1,[&] (int r) { + + for(int c = 0; c < tempNewWeights -> getNumCols();c++) + { + double originalValue = n -> weightMatrices.at(i-1) -> getValue(r,c); + double deltaValue = deltaWeights -> getValue(r,c); + originalValue = n -> momentum*originalValue; + deltaValue = n -> learningRate*deltaValue; + tempNewWeights -> setValue(r,c,(originalValue-deltaValue)); + } + }); + newWeights.push_back(tempNewWeights); + delete pGradients; + delete transposedPWeights; + delete hiddenDerived; + delete zActivatedVals; + delete transposedHidden; + delete deltaWeights; + } + delete gradients; + + for(int i = 0; i < n -> weightMatrices.size();i++) + { + delete n -> weightMatrices[i]; + } + n -> weightMatrices.clear(); + reverse(newWeights.begin(),newWeights.end()); + + for(int i = 0; i < newWeights.size();i++) + { + n -> weightMatrices.push_back(newWeights[i]); + } + } + image_processing_spar_data_Input_spar -> frame = Mat(frameSize,CV_64FC1,n -> layers.at(n -> layers.size()-1) -> getVals().data()); + image_processing_spar_data_Input_spar -> frame.convertTo(image_processing_spar_data_Input_spar -> frame,CV_8UC1); + } + ff_send_out (image_processing_spar_data_Input_spar); + return (image_processing_spar_data_struct_spar0 *)GO_ON; + } +}; +struct image_processing_spar_data_Stage_spar01 : ff_node_t < image_processing_spar_data_struct_spar0 >{ + Mat frame; + image_processing_spar_data_struct_spar0 * svc(image_processing_spar_data_struct_spar0 * image_processing_spar_data_Input_spar) { + { + vector < unsigned char > vec_denoiser(dWidth * dHeight); + memcpy(vec_denoiser.data(),image_processing_spar_data_Input_spar -> frame.data,dWidth*dHeight*sizeof(unsigned char)); + Detector * detector = new DetectorGaussian (dHeight,dWidth,false); + detector -> init(vec_denoiser.data()); + int * noisyMap = (int *)malloc(dHeight*dWidth*sizeof(int)); + + for(unsigned int ri = 0; ri < dHeight;++ri) + { + + for(unsigned int ci = 0,x = ri*dWidth; ci < dWidth;++ci,++x) + { + noisyMap[x] = (detector -> isPixelNoisy(vec_denoiser.data(),vec_denoiser.at(x),ri,ci)) ? vec_denoiser.at(x) : - 1; + } + } + unsigned int n_noisy = 0; + + for(unsigned int ri = 0; ri < dHeight;++ri) + + for(unsigned int ci = 0,x = ri*dWidth; ci < dWidth;++ci,++x) + if(noisyMap[x] >= 0) + ++n_noisy; + unsigned int * noisyPixels = (unsigned int *)malloc(n_noisy*sizeof(unsigned int)); + + for(unsigned int i = 0,ri = 0; ri < dHeight;++ri) + + for(unsigned int ci = 0,x = ri*dWidth; ci < dWidth;++ci,++x) + if(noisyMap[x] >= 0) + noisyPixels[i++] = x; + unsigned char * restore_denoiser = (unsigned char *)malloc(dHeight*dWidth*sizeof(unsigned char)); + unsigned char * diff = (unsigned char *)malloc(n_noisy*sizeof(unsigned char)); + memset(diff,0,n_noisy*sizeof(unsigned char)); + memcpy(restore_denoiser,vec_denoiser.data(),dHeight*dWidth*sizeof(unsigned char)); + float * residuals = (float *)malloc(n_noisy*sizeof(float)); + bool fixed = false; + unsigned int restore_cycles = 0; + float alfa = 1.3; + float beta = 5; + bool fixed_cycles = false; + unsigned int max_cycles = 200; + + while(true) + { + + for(unsigned int i = 0; i < n_noisy;++i) + { + unsigned int x = noisyPixels[i]; + int idx = x; + unsigned char pixel = vec_denoiser.at(idx); + int idx_neighbor = idx-dWidth*(idx >= dWidth); + unsigned char up_val = vec_denoiser.at(idx_neighbor); + unsigned char up_noisy = (noisyMap[idx_neighbor] >= 0); + idx_neighbor = idx+dWidth*(idx < ((dHeight-1)*dWidth)); + unsigned char down_val = vec_denoiser.at(idx_neighbor); + unsigned char down_noisy = (noisyMap[idx_neighbor] >= 0); + idx_neighbor = idx-((idx%dWidth) > 0); + unsigned char left_val = vec_denoiser.at(idx_neighbor); + unsigned char left_noisy = (noisyMap[idx_neighbor] >= 0); + idx_neighbor = idx+((idx%dWidth) < (dWidth-1)); + unsigned char right_val = vec_denoiser.at(idx_neighbor); + unsigned char right_noisy = (noisyMap[idx_neighbor] >= 0); + idx_neighbor = idx-1-dWidth*(idx >= dWidth); + unsigned char upl_val = vec_denoiser.at(idx_neighbor); + unsigned char upl_noisy = (noisyMap[idx_neighbor] >= 0); + idx_neighbor = idx+1-dWidth*(idx >= dWidth); + unsigned char upr_val = vec_denoiser.at(idx_neighbor); + unsigned char upr_noisy = (noisyMap[idx_neighbor] >= 0); + idx_neighbor = idx-1+dWidth*(idx < ((dHeight-1)*dWidth)); + unsigned char downl_val = vec_denoiser.at(idx_neighbor); + unsigned char downl_noisy = (noisyMap[idx_neighbor] >= 0); + idx_neighbor = idx+1+dWidth*(idx < ((dHeight-1)*dWidth)); + unsigned char downr_val = vec_denoiser.at(idx_neighbor); + unsigned char downr_noisy = (noisyMap[idx_neighbor] >= 0); + unsigned char u = 0; + float S; + float Fu,u_min = 0.0f,Fu_prec = FLT_MAX; + float beta_ = beta; + + for(int uu = 0; uu < 256;++uu) + { + u = (unsigned char)uu; + Fu = 0.0f; + S = 0.0f; + S += (float)(2-up_noisy)*sqrt(_ABS((int)u-(int)up_val)*_ABS((int)u-(int)up_val)+alfa); + S += (float)(2-down_noisy)*sqrt(_ABS(((int)u-(int)down_val))*_ABS(((int)u-(int)down_val))+alfa); + S += (float)(2-left_noisy)*sqrt(_ABS(((int)u-(int)left_val))*_ABS(((int)u-(int)left_val))+alfa); + S += (float)(2-right_noisy)*sqrt(_ABS(((int)u-(int)right_val))*_ABS(((int)u-(int)right_val))+alfa); + S += (float)(2-upl_noisy)*sqrt(_ABS((int)u-(int)upl_val)*_ABS((int)u-(int)upl_val)+alfa); + S += (float)(2-upr_noisy)*sqrt(_ABS((int)u-(int)upr_val)*_ABS((int)u-(int)upr_val)+alfa); + S += (float)(2-downl_noisy)*sqrt(_ABS(((int)u-(int)downl_val))*_ABS(((int)u-(int)downl_val))+alfa); + S += (float)(2-downr_noisy)*sqrt(_ABS(((int)u-(int)downr_val))*_ABS(((int)u-(int)downr_val))+alfa); + Fu = ((float)_ABS(u-pixel)+(beta_*S)); + if(Fu < Fu_prec) + { + u_min = u; + Fu_prec = Fu; + } + } + restore_denoiser[x] = (unsigned char)(u_min+0.5f); + unsigned char newdiff = (unsigned char)(_ABS((int)(restore_denoiser [x])- noisyMap[x])); + residuals[i] = (float)(_ABS((int)newdiff-(int)(diff[i]))); + diff[i] = newdiff; + } + float residual = 0.0f; + + for(unsigned int i = 0; i < n_noisy;++i) + residual += residuals[i]; + residual /= n_noisy; + ++restore_cycles; + if(fixed_cycles) + fixed = restore_cycles == max_cycles; else + fixed = residual < RESIDUAL_THRESHOLD || restore_cycles >= max_cycles; + if(fixed) + break; + } + free (noisyPixels); + free (diff); + free (residuals); + free (noisyMap); + delete detector; + image_processing_spar_data_Input_spar -> frame = Mat(frameSize,CV_8UC1,restore_denoiser); + } + ff_send_out (image_processing_spar_data_Input_spar); + return (image_processing_spar_data_struct_spar0 *)GO_ON; + } +}; +struct image_processing_spar_data_Stage_spar02 : ff_node_t < image_processing_spar_data_struct_spar0 >{ + image_processing_spar_data_struct_spar0 * svc(image_processing_spar_data_struct_spar0 * image_processing_spar_data_Input_spar) { + { + Mat frame_output = Mat(frameSize,CV_8UC1); + { + std::vector < int > kernel_x ({1,2,1,0,0,0,- 1,- 2,- 1 + }); + std::vector < int > kernel_y ({1,0,- 1,2,0,- 2,1,0,- 1 + }); + int kernel_size = 3; + unsigned char * data_in = (unsigned char *)(image_processing_spar_data_Input_spar -> frame.data); + unsigned char * data_out = (unsigned char *)(frame_output.data); + + for(int row = 0; row < dHeight;++row) + { + + for(int col = 0; col < dWidth;col += 1) + { + if(row <= kernel_size / 2 || row >= dHeight-kernel_size / 2 || col <= kernel_size / 2 || col >= dWidth-kernel_size / 2) + { + data_out[frame_output.step*row+col] = 0; + continue; + } + int sum_x = 0,sum_y = 0; + int k_ind = 0; + + for(int k_row = - kernel_size / 2; k_row <= kernel_size / 2;++k_row) + { + + for(int k_col = - kernel_size / 2; k_col <= kernel_size / 2;++k_col) + { + sum_x += kernel_x[k_ind]*data_in[image_processing_spar_data_Input_spar -> frame.step*(row+k_row)+col+k_col]; + sum_y += kernel_y[k_ind]*data_in[image_processing_spar_data_Input_spar -> frame.step*(row+k_row)+col+k_col]; + k_ind++; + } + } + int G = unsigned(std::sqrt(sum_x*sum_x+sum_y*sum_y)); + data_out[frame_output.step*row+col] = std::min(G,255); + } + } + } + frame_output.convertTo(frame_output,CV_8UC1); + num_frames++; + oVideoWriter.write(frame_output); + } + delete image_processing_spar_data_Input_spar; + return (image_processing_spar_data_struct_spar0 *)GO_ON; + } +}; +struct image_processing_spar_data_ToStream_spar0 : ff_node_t < image_processing_spar_data_struct_spar0 >{ + VideoCapture video; + image_processing_spar_data_struct_spar0 * svc(image_processing_spar_data_struct_spar0 * image_processing_spar_data_Input_spar) { + + while(1) + { + Mat frame; + video>>frame; + if(frame.empty()) + break; + cvtColor(frame,frame,COLOR_RGB2GRAY); + frame.convertTo(frame,CV_64FC1); + image_processing_spar_data_struct_spar0 * stream_spar = new image_processing_spar_data_struct_spar0 (frame); + ff_send_out (stream_spar); + ; + ; + } + return EOS; + } +}; +int main(int argc,char * * argv) { + image_processing_spar_data_ToStream_spar0 image_processing_spar_data_ToStream_spar0_call; + image_processing_spar_data_Stage_spar00 image_processing_spar_data_Stage_spar00_call; + std::vector < std::unique_ptr < ff_node > > image_processing_spar_data_Stage_spar01_workers (spar::get_Num_Workers()); + + for(unsigned int image_processing_spar_data_Stage_spar01_i = 0; image_processing_spar_data_Stage_spar01_i < spar::get_Num_Workers();++image_processing_spar_data_Stage_spar01_i) + image_processing_spar_data_Stage_spar01_workers[image_processing_spar_data_Stage_spar01_i] = std::unique_ptr < ff_node >(new image_processing_spar_data_Stage_spar01); + ff_OFarm < image_processing_spar_data_struct_spar0 > image_processing_spar_data_Stage_spar01_call (std::move(image_processing_spar_data_Stage_spar01_workers)); + image_processing_spar_data_Stage_spar01_call.add_emitter(image_processing_spar_data_Stage_spar00_call); + std::vector < std::unique_ptr < ff_node > > image_processing_spar_data_Stage_spar02_workers (spar::get_Num_Workers()); + + for(unsigned int image_processing_spar_data_Stage_spar02_i = 0; image_processing_spar_data_Stage_spar02_i < spar::get_Num_Workers();++image_processing_spar_data_Stage_spar02_i) + image_processing_spar_data_Stage_spar02_workers[image_processing_spar_data_Stage_spar02_i] = std::unique_ptr < ff_node >(new image_processing_spar_data_Stage_spar02); + ff_OFarm < image_processing_spar_data_struct_spar0 > image_processing_spar_data_Stage_spar02_call (std::move(image_processing_spar_data_Stage_spar02_workers)); + ff_Pipe < image_processing_spar_data_struct_spar0 > pipeline0(image_processing_spar_data_ToStream_spar0_call,image_processing_spar_data_Stage_spar01_call,image_processing_spar_data_Stage_spar02_call); + combined_spar_pf = new ff::ParallelFor * [spar::get_Num_Workers()]; + + for(int i = 0; i < spar::get_Num_Workers();i++) + { + combined_spar_pf[i] = new ff::ParallelFor (spar::get_Num_Workers2(),true); + } + spar_pf = new ff::ParallelFor (spar::get_Num_Workers2(),true); + if(argc < 2) + { + cout<<"Usage: ./bin video_dir"<(dWidth),static_cast < int >(dHeight)); + vector < int > topology; + topology.push_back(dWidth*dHeight); + topology.push_back(100); + topology.push_back(100); + topology.push_back(dWidth*dHeight); + double learningRate = 0.05; + double momentum = 1; + double bias = 1; + n = new NeuralNetwork (topology,RELU,SIGM,TANH,bias,learningRate,momentum); + oVideoWriter.open("output.mp4",cv::VideoWriter::fourcc('m','p','4','v'),fps,frameSize,false); + auto t_start = std::chrono::steady_clock::now(); + image_processing_spar_data_ToStream_spar0_call.video = video; + if(pipeline0.run_and_wait_end() < 0) + { + error("Running pipeline\n"); + exit(1); + } + auto t_end = std::chrono::steady_clock::now(); + std::cout<<"Time (ms): "<(t_end-t_start).count()<