diff --git a/Makefile b/Makefile
index fa13f6f20ddd894e0d7188f8fb9250837e9aabcf..2c0eeec1fbc29428fb222aa758f0b37744a1de70 100644
--- a/Makefile
+++ b/Makefile
@@ -4,23 +4,27 @@ SPAR_DATA=/mnt/c/Users/loffjh/Documents/wsl/spar-dev-data/bin/spar
 
 DEFS =-fpermissive -std=c++11 -O3 -Isrc/denoiser
 
-FF_PATHS= -DBLOCKING_MODE -spar_ondemand -I/mnt/c/Users/loffjh/Documents/wsl/spar-dev-data/libraries
-FF_LIB=-pthread
+FF_PATHS=-I../spar-dev-data/libraries
+FF_LIB=-lpthread
 OPENCV_PATH= -I/home/juniorloff/libs/opencv/include -L/home/juniorloff/libs/opencv/lib -I/usr/include/opencv4
 OCV_FLAGS=`pkg-config --cflags --libs /home/juniorloff/libs/opencv/lib/pkgconfig/opencv4.pc` 
 OPENCV_LIB=-lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_imgcodecs -lopencv_videoio
+SPAR_FLAGS= -DBLOCKING_MODE -spar_ordered -spar_ondemand -spar_file
 
-all: image_processing image_processing_spar image_processing_spar_data
+all: image_processing image_processing_spar image_processing_spar_data image_processing_spar_data_manual
 
 
 image_processing: src/image_processing.cpp
 	$(CPP) $(DEFS) $(OPENCV_PATH) $< -o $@ $(OPENCV_LIB) $(OCV_FLAGS)
 
 image_processing_spar: src/image_processing_spar.cpp
-	$(SPAR_CC) $(DEFS) $(OPENCV_PATH) -spar_file $< -o $@ -spar_print > spar_out.cpp $(OPENCV_LIB) $(OCV_FLAGS)
+	$(SPAR_CC) $(DEFS) $(OPENCV_PATH) $(SPAR_FLAGS) $< -o $@$(OPENCV_LIB) $(OCV_FLAGS)
 
 image_processing_spar_data: src/image_processing_spar_data.cpp
-	$(SPAR_DATA) $(DEFS) $(OPENCV_PATH) -spar_file $< -o $@ $(OPENCV_LIB) $(OCV_FLAGS)
+	$(SPAR_DATA) $(DEFS) $(OPENCV_PATH) $(SPAR_FLAGS) $< -o $@ -spar_print > spar_out.cpp $(OPENCV_LIB) $(OCV_FLAGS)
+
+image_processing_spar_data_manual: src/spar_out.cpp
+	$(CPP) $(DEFS) $(OPENCV_PATH) $(FF_PATHS) $< -o $@ $(OPENCV_LIB) $(FF_LIB) $(OCV_FLAGS)
 
 clean:
-	rm -rf image_processing image_processing_spar image_processing_spar_data *.txt
+	rm -rf image_processing image_processing_spar image_processing_spar_data *.txt
\ No newline at end of file
diff --git a/src/image_processing.cpp b/src/image_processing.cpp
index b79801bf2bed1dc75f1e76b8245b98a4a077f39a..cfe1fdd79eb519550565071eb902f470421715cf 100644
--- a/src/image_processing.cpp
+++ b/src/image_processing.cpp
@@ -11,6 +11,7 @@
 #include <ostream>
 #include <streambuf>
 #include <ctime> 
+#include <chrono>
 
 #include <opencv2/opencv.hpp>
 #include <opencv2/core.hpp>
@@ -47,6 +48,7 @@
 #include <gaussian/NoiserGaussian.hpp>
 
 using namespace std;
+using namespace chrono;
 using namespace cv;
 
 #define TANH 1
@@ -73,7 +75,8 @@ int main(int argc, char **argv) {
 
 	string video_dir = argv[1];
 
-	VideoCapture video(video_dir);
+	VideoCapture video;
+	video.open(video_dir);
 
 	if (!video.isOpened()) return -1;
 
@@ -101,6 +104,7 @@ int main(int argc, char **argv) {
 	// initialize the VideoWriter object 
 	oVideoWriter.open("output.mp4", cv::VideoWriter::fourcc('m','p','4','v'), fps, frameSize, false);
 	
+    auto t_start = std::chrono::steady_clock::now();
 	// Stream
 	while(1){
 		Mat frame;
@@ -533,6 +537,11 @@ int main(int argc, char **argv) {
 			oVideoWriter.write(frame_output);
 		}
 	}
+	
+    auto t_end = std::chrono::steady_clock::now();
+
+    std::cout << "Time (ms): " << std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count() << std::endl;
+	
 	video.release();
   	oVideoWriter.release();
 
diff --git a/src/image_processing_spar.cpp b/src/image_processing_spar.cpp
index 535a793ddd4934094e3ebd96f5fb90b6225ab1c3..b02852ca3ae5c5324943f90e0e6995c4101eb468 100644
--- a/src/image_processing_spar.cpp
+++ b/src/image_processing_spar.cpp
@@ -11,6 +11,7 @@
 #include <ostream>
 #include <streambuf>
 #include <ctime> 
+#include <chrono>
 
 #include <opencv2/opencv.hpp>
 #include <opencv2/core.hpp>
@@ -47,6 +48,7 @@
 #include <gaussian/NoiserGaussian.hpp>
 
 using namespace std;
+using namespace chrono;
 using namespace cv;
 
 #define TANH 1
@@ -73,7 +75,8 @@ int main(int argc, char **argv) {
 
 	string video_dir = argv[1];
 
-	VideoCapture video(video_dir);
+	VideoCapture video;
+	video.open(video_dir);
 
 	if (!video.isOpened()) return -1;
 
@@ -101,6 +104,7 @@ int main(int argc, char **argv) {
 	// initialize the VideoWriter object 
 	oVideoWriter.open("output.mp4", cv::VideoWriter::fourcc('m','p','4','v'), fps, frameSize, false);
 	
+    auto t_start = std::chrono::steady_clock::now();
 	// Stream
 	[[spar::ToStream, spar::Input(video)]]
 	while(1){
@@ -349,7 +353,7 @@ int main(int argc, char **argv) {
 		}
 
 		// Stage
-		[[spar::Stage, spar::Input(frame), spar::Output(frame)]]
+		[[spar::Stage, spar::Input(frame), spar::Output(frame), spar::Replicate()]]
 		{
 			// Input
 			vector<unsigned char> vec_denoiser(dWidth*dHeight);
@@ -536,6 +540,11 @@ int main(int argc, char **argv) {
 			oVideoWriter.write(frame_output);
 		}
 	}
+	
+    auto t_end = std::chrono::steady_clock::now();
+
+    std::cout << "Time (ms): " << std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count() << std::endl;
+
 	video.release();
   	oVideoWriter.release();
 
diff --git a/src/image_processing_spar_data.cpp b/src/image_processing_spar_data.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a4a5338f7881bacc374ad92fa917d42dd38cb16a
--- /dev/null
+++ b/src/image_processing_spar_data.cpp
@@ -0,0 +1,564 @@
+
+// Neural Network - https://github.com/ralampay/ann
+// Denoiser - https://github.com/fastflow/fastflow/tree/fully-c%2B%2B11/examples/denoiser
+// Sobel - https://soubhihadri.medium.com/image-processing-best-practices-c-part-2-c0988b2d3e0c
+
+
+#include <iostream>
+#include <vector>
+#include <cstdio>
+#include <fstream>
+#include <ostream>
+#include <streambuf>
+#include <ctime> 
+#include <chrono>
+
+#include <opencv2/opencv.hpp>
+#include <opencv2/core.hpp>
+#include <opencv2/videoio.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/imgcodecs.hpp>
+
+#include "Matrix.cpp"
+#include "Neuron.cpp"
+#include "Layer.cpp"
+#include "utils/Math.cpp"
+#include "utils/Misc.cpp"
+#include "neural_network/NeuralNetwork.cpp"
+#include "neural_network/backPropagation.cpp"
+#include "neural_network/feedForward.cpp"
+#include "neural_network/setErrors.cpp"
+
+#include <Denoiser.hpp>
+#include <parameters.h>
+#include <utils.hpp>
+#include <Input.hpp>
+#include <Output.hpp>
+#include <ocv/BitmapInputOCV.hpp>
+#include <ocv/BitmapOutputOCV.hpp>
+#include <ocv/VideoInputOCV.hpp>
+#include <ocv/VideoOutputOCV.hpp>
+#include <ocv/CVImageViewer.hpp>
+#include <spd/DenoiserSPD.hpp>
+#include <spd/DetectorSPD.hpp>
+#include <spd/NoiserSPD.hpp>
+#include <gaussian/DenoiserGaussian.hpp>
+#include <gaussian/DetectorGaussian.hpp>
+#include <gaussian/NoiserGaussian.hpp>
+
+using namespace std;
+using namespace chrono;
+using namespace cv;
+
+#define TANH 1
+#define RELU 2
+#define SIGM 3
+
+//global variables
+VideoWriter oVideoWriter;
+int num_frames;
+int dWidth;
+int dHeight ; 
+int fps;
+int total_frames;
+NeuralNetwork * n;
+Size frameSize;
+
+int main(int argc, char **argv) {
+
+	
+	if(argc < 2) {
+		cout << "Usage: ./bin video_dir" << endl;
+		return 1;
+	}
+
+	string video_dir = argv[1];
+
+	VideoCapture video;
+	video.open(video_dir);
+
+	if (!video.isOpened()) return -1;
+
+	// initialize video variables
+	num_frames = 0;
+	dWidth = video.get(CAP_PROP_FRAME_WIDTH);
+	dHeight = video.get(CAP_PROP_FRAME_HEIGHT); 
+	fps = video.get(CAP_PROP_FPS);
+	total_frames = video.get(CAP_PROP_FRAME_COUNT);
+	frameSize = Size(static_cast<int>(dWidth), static_cast<int>(dHeight));
+
+	// initialize neural network parameters
+	vector<int> topology;
+	topology.push_back(dWidth*dHeight);
+	topology.push_back(100);
+	topology.push_back(100);
+	topology.push_back(dWidth*dHeight);
+	double learningRate  = 0.05;
+	double momentum      = 1;
+	double bias          = 1;
+
+	// initialize neural network
+	n = new NeuralNetwork(topology, RELU, SIGM, TANH, bias, learningRate, momentum);
+
+	// initialize the VideoWriter object 
+	oVideoWriter.open("output.mp4", cv::VideoWriter::fourcc('m','p','4','v'), fps, frameSize, false);
+	
+    auto t_start = std::chrono::steady_clock::now();
+	// Stream
+	[[spar::ToStream, spar::Input(video)]]
+	while(1){
+		Mat frame;
+		video >> frame;
+
+		if (frame.empty())
+			break;
+
+		cvtColor(frame,frame,COLOR_RGB2GRAY);
+		frame.convertTo(frame, CV_64FC1);
+		
+
+		// Stage
+		[[spar::Stage, spar::Input(frame), spar::Output(frame)]]
+		{
+			// Input
+			vector<double> vec_neural_network((dWidth*dHeight));
+			memcpy(vec_neural_network.data(), frame.data, dWidth*dHeight*sizeof(double)); 
+
+			// Computation
+			for (int step = 0; step < 10; step++) {
+				n->setCurrentInput(vec_neural_network);
+				n->setCurrentTarget(vec_neural_network);
+
+				// feedForward
+				Matrix *a;  // Matrix of neurons to the left
+				Matrix *b;  // Matrix of weights to the right of layer
+				Matrix *c;  // Matrix of neurons to the next layer
+
+				for(int i = 0; i < (n->topologySize - 1); i++) {
+					a = n->getNeuronMatrix(i);
+					b = n->getWeightMatrix(i);
+					c = new Matrix(
+						a->getNumRows(),
+						b->getNumCols(),
+						false
+						);
+
+					if(i != 0) {
+						a = n->getActivatedNeuronMatrix(i);
+					}
+
+					// matrix mult
+					for(int i = 0; i < a->getNumRows(); i++) {
+                        [[spar::Pure]]
+						for(int j = 0; j < b->getNumCols(); j++) {
+							for(int k = 0; k < b->getNumRows(); k++) {
+								double p      = a->getValue(i, k) * b->getValue(k, j);
+								double newVal = c->getValue(i, j) + p;
+								c->setValue(i, j, newVal);
+							}
+						} 
+					}
+
+					for(int c_index = 0; c_index < c->getNumCols(); c_index++) {
+						n->setNeuronValue(i + 1, c_index, c->getValue(0, c_index) + n->bias);
+					}
+
+					delete a;
+					delete b;
+					delete c;
+				}
+
+
+				n->setErrors();
+				// backPropagation
+				vector<Matrix *> newWeights;
+				Matrix *deltaWeights;
+				Matrix *gradients;
+				Matrix *derivedValues;
+				Matrix *gradientsTransposed;
+				Matrix *zActivatedVals;
+				Matrix *tempNewWeights;
+				Matrix *pGradients;
+				Matrix *transposedPWeights;
+				Matrix *hiddenDerived;
+				Matrix *transposedHidden;
+
+				// PART 1: OUTPUT TO LAST HIDDEN LAYER
+				int indexOutputLayer  = n->topology.size() - 1;
+
+				gradients = new Matrix(
+								1,
+								n->topology.at(indexOutputLayer),
+								false
+							);
+
+				derivedValues = n->layers.at(indexOutputLayer)->matrixifyDerivedVals();
+
+				for(int i = 0; i < n->topology.at(indexOutputLayer); i++) {
+                    [[spar::Pure]]
+                    {
+                        double e  = n->derivedErrors.at(i);
+                        double y  = derivedValues->getValue(0, i);
+                        double g  = e * y;
+                        gradients->setValue(0, i, g);
+                    }
+				}
+
+				// Gt * Z
+				gradientsTransposed = gradients->transpose();
+				zActivatedVals      = n->layers.at(indexOutputLayer - 1)->matrixifyActivatedVals();
+
+				deltaWeights  = new Matrix(
+									gradientsTransposed->getNumRows(),
+									zActivatedVals->getNumCols(),
+									false
+								);
+
+				// matrix mult
+				for(int i = 0; i < gradientsTransposed->getNumRows(); i++) {
+                    [[spar::Pure]]
+					for(int j = 0; j < zActivatedVals->getNumCols(); j++) {
+						for(int k = 0; k < zActivatedVals->getNumRows(); k++) {
+							double p      = gradientsTransposed->getValue(i, k) * zActivatedVals->getValue(k, j);
+							double newVal = deltaWeights->getValue(i, j) + p;
+							deltaWeights->setValue(i, j, newVal);
+						}
+					} 
+				}
+
+				// COMPUTE FOR NEW WEIGHTS (LAST HIDDEN <-> OUTPUT)
+				tempNewWeights  = new Matrix(
+									n->topology.at(indexOutputLayer - 1),
+									n->topology.at(indexOutputLayer),
+									false
+									);
+
+				for(int r = 0; r < n->topology.at(indexOutputLayer - 1); r++) {
+                    [[spar::Pure]]
+					for(int c = 0; c < n->topology.at(indexOutputLayer); c++) {
+
+					double originalValue  = n->weightMatrices.at(indexOutputLayer - 1)->getValue(r, c);
+					double deltaValue     = deltaWeights->getValue(c, r);
+
+					originalValue = n->momentum * originalValue;
+					deltaValue    = n->learningRate * deltaValue;
+					
+					tempNewWeights->setValue(r, c, (originalValue - deltaValue));
+					}
+				}
+
+				newWeights.push_back(tempNewWeights);
+
+				delete gradientsTransposed;
+				delete zActivatedVals;
+				delete deltaWeights;
+				delete derivedValues;
+
+				// PART 2: LAST HIDDEN LAYER TO INPUT LAYER
+				for(int i = (indexOutputLayer - 1); i > 0; i--) {
+					pGradients = gradients;
+
+					transposedPWeights  = n->weightMatrices.at(i)->transpose();
+
+					gradients   = new Matrix(
+									pGradients->getNumRows(),
+									transposedPWeights->getNumCols(),
+									false
+								);
+
+					// matrix mult
+					for(int i = 0; i < pGradients->getNumRows(); i++) {
+                        [[spar::Pure]]
+						for(int j = 0; j < transposedPWeights->getNumCols(); j++) {
+							for(int k = 0; k < transposedPWeights->getNumRows(); k++) {
+								double p      = pGradients->getValue(i, k) * transposedPWeights->getValue(k, j);
+								double newVal = gradients->getValue(i, j) + p;
+								gradients->setValue(i, j, newVal);
+							}
+						} 
+					}
+
+
+					hiddenDerived       = n->layers.at(i)->matrixifyDerivedVals();
+
+					for(int colCounter = 0; colCounter < hiddenDerived->getNumCols(); colCounter++) {
+                        [[spar::Pure]]
+                        {
+                            double  g = gradients->getValue(0, colCounter) * hiddenDerived->getValue(0, colCounter);
+                            gradients->setValue(0, colCounter, g);
+                        }
+					}
+
+					if(i == 1) {
+						zActivatedVals  = n->layers.at(0)->matrixifyVals();
+					} else {
+						zActivatedVals  = n->layers.at(i-1)->matrixifyActivatedVals();
+					}
+
+					transposedHidden  = zActivatedVals->transpose();
+
+					deltaWeights      = new Matrix(
+										transposedHidden->getNumRows(),
+										gradients->getNumCols(),
+										false
+										);
+
+					// matrix mult
+					for(int i = 0; i < transposedHidden->getNumRows(); i++) {
+                        [[spar::Pure]]
+						for(int j = 0; j < gradients->getNumCols(); j++) {
+							for(int k = 0; k < gradients->getNumRows(); k++) {
+								double p      = transposedHidden->getValue(i, k) * gradients->getValue(k, j);
+								double newVal = deltaWeights->getValue(i, j) + p;
+								deltaWeights->setValue(i, j, newVal);
+							}
+						} 
+					}
+					// update weights
+					tempNewWeights  = new Matrix(
+										n->weightMatrices.at(i - 1)->getNumRows(),
+										n->weightMatrices.at(i - 1)->getNumCols(),
+										false
+									);
+
+					for(int r = 0; r < tempNewWeights->getNumRows(); r++) {
+                        [[spar::Pure]]
+						for(int c = 0; c < tempNewWeights->getNumCols(); c++) {
+							double originalValue  = n->weightMatrices.at(i - 1)->getValue(r, c);
+							double deltaValue     = deltaWeights->getValue(r, c);
+
+							originalValue = n->momentum * originalValue;
+							deltaValue    = n->learningRate * deltaValue;
+							
+							tempNewWeights->setValue(r, c, (originalValue - deltaValue));
+						}
+					}
+
+					newWeights.push_back(tempNewWeights);
+
+					delete pGradients;
+					delete transposedPWeights;
+					delete hiddenDerived;
+					delete zActivatedVals;
+					delete transposedHidden;
+					delete deltaWeights;
+				}
+				delete gradients;
+
+				for(int i = 0; i < n->weightMatrices.size(); i++) {
+					delete n->weightMatrices[i];
+				}
+
+				n->weightMatrices.clear();
+
+				reverse(newWeights.begin(), newWeights.end());
+
+				for(int i = 0; i < newWeights.size(); i++) {
+					n->weightMatrices.push_back(newWeights[i]);
+				}
+			}
+
+			// Output
+			frame = Mat(frameSize, CV_64FC1, n->layers.at(n->layers.size()-1)->getVals().data());
+			frame.convertTo(frame, CV_8UC1);
+		}
+
+		// Stage
+		[[spar::Stage, spar::Input(frame), spar::Output(frame), spar::Replicate()]]
+		{
+			// Input
+			vector<unsigned char> vec_denoiser(dWidth*dHeight);
+			memcpy(vec_denoiser.data(), frame.data, dWidth*dHeight*sizeof(unsigned char)); 
+			
+			// Computation
+			Detector * detector = new DetectorGaussian(dHeight, dWidth, false);
+			detector->init(vec_denoiser.data());
+
+			int * noisyMap = (int *) malloc(dHeight * dWidth * sizeof(int));
+			for (unsigned int ri = 0; ri < dHeight; ++ri){
+				for (unsigned int ci = 0, x = ri * dWidth; ci < dWidth; ++ci, ++x){
+					noisyMap[x] = (detector->isPixelNoisy(vec_denoiser.data(), vec_denoiser.at(x), ri, ci)) ? vec_denoiser.at(x) : -1;
+
+					
+				}
+			}
+
+			//array of noisy pixels
+			unsigned int n_noisy = 0;
+			for (unsigned int ri = 0; ri < dHeight; ++ri)
+				for (unsigned int ci = 0, x = ri * dWidth; ci < dWidth; ++ci, ++x)
+					if (noisyMap[x] >= 0)
+						++n_noisy;
+			unsigned int * noisyPixels = (unsigned int *) malloc(n_noisy * sizeof(unsigned int));
+			for (unsigned int i = 0, ri = 0; ri < dHeight; ++ri)
+				for (unsigned int ci = 0, x = ri * dWidth; ci < dWidth; ++ci, ++x)
+					if (noisyMap[x] >= 0)
+						noisyPixels[i++] = x;
+
+			unsigned char * restore_denoiser = (unsigned char *) malloc(dHeight * dWidth * sizeof(unsigned char));
+			unsigned char * diff = (unsigned char *) malloc(n_noisy * sizeof(unsigned char));
+
+			memset(diff, 0, n_noisy * sizeof(unsigned char));
+			memcpy(restore_denoiser, vec_denoiser.data(), dHeight * dWidth * sizeof(unsigned char));
+
+			float *residuals = (float *) malloc(n_noisy * sizeof(float));
+			bool fixed = false;
+			unsigned int restore_cycles = 0;
+			float alfa = 1.3;
+			float beta = 5;
+			bool fixed_cycles = false; 
+			unsigned int max_cycles = 200;
+			while (true) {
+				//restore
+				for (unsigned int i = 0; i < n_noisy; ++i) {
+					unsigned int x = noisyPixels[i];
+					int idx = x;
+
+					//get the pixel and the 8 closest
+					unsigned char pixel = vec_denoiser.at(idx);
+					//up
+					int idx_neighbor = idx - dWidth * (idx >= dWidth);
+					unsigned char up_val = vec_denoiser.at(idx_neighbor);
+					unsigned char up_noisy = (noisyMap[idx_neighbor] >= 0);
+					//down
+					idx_neighbor = idx + dWidth * (idx < ((dHeight - 1) * dWidth));
+					unsigned char down_val = vec_denoiser.at(idx_neighbor);
+					unsigned char down_noisy = (noisyMap[idx_neighbor] >= 0);
+					//left
+					idx_neighbor = idx - ((idx % dWidth) > 0);
+					unsigned char left_val = vec_denoiser.at(idx_neighbor);
+					unsigned char left_noisy = (noisyMap[idx_neighbor] >= 0);
+					//right
+					idx_neighbor = idx + ((idx % dWidth) < (dWidth - 1));
+					unsigned char right_val = vec_denoiser.at(idx_neighbor);
+					unsigned char right_noisy = (noisyMap[idx_neighbor] >= 0);
+					//up-left
+					idx_neighbor = idx - 1 - dWidth * (idx >= dWidth);
+					unsigned char upl_val = vec_denoiser.at(idx_neighbor);
+					unsigned char upl_noisy = (noisyMap[idx_neighbor] >= 0);
+					//up-right
+					idx_neighbor = idx + 1 - dWidth * (idx >= dWidth);
+					unsigned char upr_val = vec_denoiser.at(idx_neighbor);
+					unsigned char upr_noisy = (noisyMap[idx_neighbor] >= 0);
+					//down-left
+					idx_neighbor = idx - 1 + dWidth * (idx < ((dHeight - 1) * dWidth));
+					unsigned char downl_val = vec_denoiser.at(idx_neighbor);
+					unsigned char downl_noisy = (noisyMap[idx_neighbor] >= 0);
+					//down-right
+					idx_neighbor = idx + 1 + dWidth * (idx < ((dHeight - 1) * dWidth));
+					unsigned char downr_val = vec_denoiser.at(idx_neighbor);
+					unsigned char downr_noisy = (noisyMap[idx_neighbor] >= 0);
+
+					//compute the correction
+					unsigned char u = 0;
+					float S;
+					float Fu, u_min = 0.0f, Fu_prec = FLT_MAX; // 256.0f;
+					float beta_ = beta; // / 2;
+					for (int uu = 0; uu < 256; ++uu) {
+						u = (unsigned char) uu;
+						Fu = 0.0f;
+						S = 0.0f;
+						S += (float) (2 - up_noisy) * sqrt(_ABS((int) u - (int) up_val) * _ABS((int) u - (int) up_val) + alfa);
+						S += (float) (2 - down_noisy) * sqrt(_ABS(((int) u - (int) down_val)) * _ABS(((int) u - (int) down_val)) + alfa);
+						S += (float) (2 - left_noisy) * sqrt(_ABS(((int) u - (int) left_val)) * _ABS(((int) u - (int) left_val)) + alfa);
+						S += (float) (2 - right_noisy) * sqrt(_ABS(((int) u - (int) right_val)) * _ABS(((int) u - (int) right_val)) + alfa);
+						S += (float) (2 - upl_noisy) * sqrt(_ABS((int) u - (int) upl_val) * _ABS((int) u - (int) upl_val) + alfa);
+						S += (float) (2 - upr_noisy) * sqrt(_ABS((int) u - (int) upr_val) * _ABS((int) u - (int) upr_val) + alfa);
+						S += (float) (2 - downl_noisy) * sqrt(_ABS(((int) u - (int) downl_val)) * _ABS(((int) u - (int) downl_val)) + alfa);
+						S += (float) (2 - downr_noisy) * sqrt(_ABS(((int) u - (int) downr_val)) * _ABS(((int) u - (int) downr_val)) + alfa);
+
+						Fu = ((float) _ABS(u - pixel) + (beta_ * S));
+						if (Fu < Fu_prec) {
+						u_min = u;
+						Fu_prec = Fu;
+						}
+					}
+					restore_denoiser[x] = (unsigned char) (u_min + 0.5f); //round
+
+					unsigned char newdiff = (unsigned char) (_ABS((int) (restore_denoiser[x]) - noisyMap[x]));
+					residuals[i] = (float) (_ABS((int) newdiff - (int) (diff[i])));
+					diff[i] = newdiff;
+				}
+				//reduce residuals
+				float residual = 0.0f;
+				for (unsigned int i = 0; i < n_noisy; ++i)
+					residual += residuals[i];
+				residual /= n_noisy;
+				++restore_cycles;
+				//check convergence
+				if (fixed_cycles)
+					fixed = restore_cycles == max_cycles;
+				else
+					fixed = residual < RESIDUAL_THRESHOLD || restore_cycles >= max_cycles;
+				if (fixed)
+					break;
+			}
+
+			//clean-up
+			free(noisyPixels);
+			free(diff);
+			free(residuals);
+			free(noisyMap);
+			delete detector;
+
+			// Output
+			frame = Mat(frameSize, CV_8UC1, restore_denoiser);
+		}
+		// Stage
+		[[spar::Stage, spar::Input(frame), spar::Replicate()]]
+		{
+
+			// Input
+			Mat frame_output = Mat(frameSize, CV_8UC1);
+
+			// Computation
+			{
+				std::vector<int> kernel_x({1,2,1,0,0,0,-1,-2,-1});
+				std::vector<int> kernel_y({1,0,-1,2,0,-2,1,0,-1});
+				int kernel_size = 3;
+
+				unsigned char *data_in = (unsigned char*)(frame.data);
+				unsigned char *data_out = (unsigned char*)(frame_output.data);
+
+				for (int row = 0; row < dHeight; ++row) {
+					for (int col = 0; col < dWidth ; col += 1) {
+
+						if (row <= kernel_size/2 || row >= dHeight-kernel_size/2 ||
+							col <= kernel_size/2 || col >= dWidth-kernel_size/2){
+							data_out[frame_output.step*row+col] = 0;
+							continue;
+						}
+						
+						int sum_x = 0, sum_y = 0;
+						int k_ind = 0;
+						for (int k_row = -kernel_size/2; k_row <= kernel_size/2; ++k_row) {
+							for (int k_col = -kernel_size/2; k_col <= kernel_size/2; ++k_col) {
+								sum_x += kernel_x[k_ind]*data_in[frame.step*(row+k_row)+col+k_col];
+								sum_y += kernel_y[k_ind]*data_in[frame.step*(row+k_row)+col+k_col];
+								k_ind++;
+							}
+						}
+						int G = unsigned(std::sqrt(sum_x*sum_x+sum_y*sum_y));
+						data_out[frame_output.step*row+col] = std::min(G,255);
+					}
+				}
+			}
+
+			// Output
+			frame_output.convertTo(frame_output, CV_8UC1);
+
+			num_frames++;
+			oVideoWriter.write(frame_output);
+		}
+	}
+	
+    auto t_end = std::chrono::steady_clock::now();
+
+    std::cout << "Time (ms): " << std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count() << std::endl;
+
+	video.release();
+  	oVideoWriter.release();
+
+    return 0;
+}
\ No newline at end of file
diff --git a/src/spar_out.cpp b/src/spar_out.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4358307cea2993b156b9e385458c94cd9c7e703e
--- /dev/null
+++ b/src/spar_out.cpp
@@ -0,0 +1,610 @@
+
+#include <iostream>
+ 
+#include <vector>
+ 
+#include <cstdio>
+ 
+#include <fstream>
+ 
+#include <ostream>
+ 
+#include <streambuf>
+ 
+#include <ctime> 
+ 
+#include <chrono>
+ 
+#include <opencv2/opencv.hpp>
+ 
+#include <opencv2/core.hpp>
+ 
+#include <opencv2/videoio.hpp>
+ 
+#include <opencv2/highgui.hpp>
+ 
+#include <opencv2/imgproc.hpp>
+ 
+#include <opencv2/imgcodecs.hpp>
+ 
+#include "Matrix.cpp"
+ 
+#include "Neuron.cpp"
+ 
+#include "Layer.cpp"
+ 
+#include "utils/Math.cpp"
+ 
+#include "utils/Misc.cpp"
+ 
+#include "neural_network/NeuralNetwork.cpp"
+ 
+#include "neural_network/backPropagation.cpp"
+ 
+#include "neural_network/feedForward.cpp"
+ 
+#include "neural_network/setErrors.cpp"
+ 
+#include <Denoiser.hpp>
+ 
+#include <parameters.h>
+ 
+#include <utils.hpp>
+ 
+#include <Input.hpp>
+ 
+#include <Output.hpp>
+ 
+#include <ocv/BitmapInputOCV.hpp>
+ 
+#include <ocv/BitmapOutputOCV.hpp>
+ 
+#include <ocv/VideoInputOCV.hpp>
+ 
+#include <ocv/VideoOutputOCV.hpp>
+ 
+#include <ocv/CVImageViewer.hpp>
+ 
+#include <spd/DenoiserSPD.hpp>
+ 
+#include <spd/DetectorSPD.hpp>
+ 
+#include <spd/NoiserSPD.hpp>
+ 
+#include <gaussian/DenoiserGaussian.hpp>
+ 
+#include <gaussian/DetectorGaussian.hpp>
+ 
+#include <gaussian/NoiserGaussian.hpp>
+ 
+using namespace std; 
+using namespace chrono; 
+using namespace cv; 
+#define TANH 1
+ 
+#define RELU 2
+ 
+#define SIGM 3
+ 
+VideoWriter oVideoWriter; 
+int num_frames; 
+int dWidth; 
+int dHeight; 
+int fps; 
+int total_frames; 
+NeuralNetwork * n; 
+Size frameSize; 
+#include "ff/ff.hpp"
+ 
+#include "ff/pipeline.hpp"
+ 
+#include "ff/farm.hpp"
+ 
+#include "ff/parallel_for.hpp"
+ 
+using namespace ff; 
+namespace spar{
+	static inline ssize_t get_mac_core() {
+		ssize_t n = 1; 
+		FILE * f; 
+		f = popen("cat /proc/cpuinfo |grep processor | wc -l","r"); 
+		if(fscanf(f,"%ld",& n) == EOF)
+		{
+			pclose (f); 
+			return n;
+		} 
+		pclose (f); 
+		return n;
+	} 
+	static inline ssize_t get_env_num_workers(int level) {
+		ssize_t n = 1; 
+		FILE * f; 
+		if(level == 1)
+		f = popen("echo $SPAR_NUM_WORKERS","r"); 
+		if(level == 2)
+		f = popen("echo $SPAR_NUM_WORKERS2","r"); 
+		if(fscanf(f,"%ld",& n) == EOF)
+		{
+			pclose (f); 
+			return n;
+		} 
+		pclose (f); 
+		return n;
+	} 
+	static inline ssize_t get_Num_Workers() {
+		ssize_t w_size = get_env_num_workers(1); 
+		if(w_size > 0)
+		{
+			return w_size;
+		} 
+		return get_mac_core();
+	} 
+	static inline ssize_t get_Num_Workers2() {
+		ssize_t w_size = get_env_num_workers(2); 
+		if(w_size > 0)
+		{
+			return w_size;
+		} 
+		return get_mac_core();
+	}
+} 
+struct image_processing_spar_data_struct_spar0{
+	image_processing_spar_data_struct_spar0() {
+	} 
+	image_processing_spar_data_struct_spar0(Mat frame) : frame(frame) {
+	} 
+	; 
+	Mat frame;
+}; 
+ff::ParallelFor * * combined_spar_pf; 
+ff::ParallelFor * spar_pf; 
+struct image_processing_spar_data_Stage_spar00 : ff_node_t < image_processing_spar_data_struct_spar0 >{
+	Mat frame; 
+	image_processing_spar_data_struct_spar0 * svc(image_processing_spar_data_struct_spar0 * image_processing_spar_data_Input_spar) {
+		{
+			vector < double > vec_neural_network ((dWidth*dHeight)); 
+			memcpy(vec_neural_network.data(),image_processing_spar_data_Input_spar -> frame.data,dWidth*dHeight*sizeof(double)); 
+			
+			for(int step = 0; step < 10;step++)
+			{
+				n -> setCurrentInput(vec_neural_network); 
+				n -> setCurrentTarget(vec_neural_network); 
+				Matrix * a; 
+				Matrix * b; 
+				Matrix * c; 
+				
+				for(int i = 0; i < (n -> topologySize-1);i++)
+				{
+					a = n -> getNeuronMatrix(i); 
+					b = n -> getWeightMatrix(i); 
+					c = new Matrix (a -> getNumRows(),b -> getNumCols(),false); 
+					if(i != 0)
+					{
+						a = n -> getActivatedNeuronMatrix(i);
+					} 
+					combined_spar_pf[0] -> parallel_for(0,a -> getNumRows()+0,1,[&] (int i) {
+							
+							for(int j = 0; j < b -> getNumCols();j++)
+							{
+								
+								for(int k = 0; k < b -> getNumRows();k++)
+								{
+									double p = a -> getValue(i,k)*b -> getValue(k,j); 
+									double newVal = c -> getValue(i,j)+p; 
+									c -> setValue(i,j,newVal);
+								}
+							}
+						}); 
+					
+					for(int c_index = 0; c_index < c -> getNumCols();c_index++)
+					{
+						n -> setNeuronValue(i+1,c_index,c -> getValue(0,c_index)+n -> bias);
+					} 
+					delete a; 
+					delete b; 
+					delete c;
+				} 
+				n -> setErrors(); 
+				vector < Matrix * > newWeights; 
+				Matrix * deltaWeights; 
+				Matrix * gradients; 
+				Matrix * derivedValues; 
+				Matrix * gradientsTransposed; 
+				Matrix * zActivatedVals; 
+				Matrix * tempNewWeights; 
+				Matrix * pGradients; 
+				Matrix * transposedPWeights; 
+				Matrix * hiddenDerived; 
+				Matrix * transposedHidden; 
+				int indexOutputLayer = n -> topology.size()-1; 
+				gradients = new Matrix (1,n -> topology.at(indexOutputLayer),false); 
+				derivedValues = n -> layers.at(indexOutputLayer) -> matrixifyDerivedVals(); 
+				combined_spar_pf[0] -> parallel_for(0,n -> topology.at(indexOutputLayer)+0,1,[&] (int i) {
+						{
+							double e = n -> derivedErrors.at(i); 
+							double y = derivedValues -> getValue(0,i); 
+							double g = e*y; 
+							gradients -> setValue(0,i,g);
+						}
+					}); 
+				gradientsTransposed = gradients -> transpose(); 
+				zActivatedVals = n -> layers.at(indexOutputLayer-1) -> matrixifyActivatedVals(); 
+				deltaWeights = new Matrix (gradientsTransposed -> getNumRows(),zActivatedVals -> getNumCols(),false); 
+				combined_spar_pf[0] -> parallel_for(0,gradientsTransposed -> getNumRows()+0,1,[&] (int i) {
+						
+						for(int j = 0; j < zActivatedVals -> getNumCols();j++)
+						{
+							
+							for(int k = 0; k < zActivatedVals -> getNumRows();k++)
+							{
+								double p = gradientsTransposed -> getValue(i,k)*zActivatedVals -> getValue(k,j); 
+								double newVal = deltaWeights -> getValue(i,j)+p; 
+								deltaWeights -> setValue(i,j,newVal);
+							}
+						}
+					}); 
+				tempNewWeights = new Matrix (n -> topology.at(indexOutputLayer-1),n -> topology.at(indexOutputLayer),false); 
+				combined_spar_pf[0] -> parallel_for(0,n -> topology.at(indexOutputLayer-1)+0,1,[&] (int r) {
+						
+						for(int c = 0; c < n -> topology.at(indexOutputLayer);c++)
+						{
+							double originalValue = n -> weightMatrices.at(indexOutputLayer-1) -> getValue(r,c); 
+							double deltaValue = deltaWeights -> getValue(c,r); 
+							originalValue = n -> momentum*originalValue; 
+							deltaValue = n -> learningRate*deltaValue; 
+							tempNewWeights -> setValue(r,c,(originalValue-deltaValue));
+						}
+					}); 
+				newWeights.push_back(tempNewWeights); 
+				delete gradientsTransposed; 
+				delete zActivatedVals; 
+				delete deltaWeights; 
+				delete derivedValues; 
+				
+				for(int i = (indexOutputLayer-1); i > 0;i--)
+				{
+					pGradients = gradients; 
+					transposedPWeights = n -> weightMatrices.at(i) -> transpose(); 
+					gradients = new Matrix (pGradients -> getNumRows(),transposedPWeights -> getNumCols(),false); 
+					combined_spar_pf[0] -> parallel_for(0,pGradients -> getNumRows()+0,1,[&] (int i) {
+							
+							for(int j = 0; j < transposedPWeights -> getNumCols();j++)
+							{
+								
+								for(int k = 0; k < transposedPWeights -> getNumRows();k++)
+								{
+									double p = pGradients -> getValue(i,k)*transposedPWeights -> getValue(k,j); 
+									double newVal = gradients -> getValue(i,j)+p; 
+									gradients -> setValue(i,j,newVal);
+								}
+							}
+						}); 
+					hiddenDerived = n -> layers.at(i) -> matrixifyDerivedVals(); 
+					combined_spar_pf[0] -> parallel_for(0,hiddenDerived -> getNumCols()+0,1,[&] (int colCounter) {
+							{
+								double g = gradients -> getValue(0,colCounter)*hiddenDerived -> getValue(0,colCounter); 
+								gradients -> setValue(0,colCounter,g);
+							}
+						}); 
+					if(i == 1)
+					{
+						zActivatedVals = n -> layers.at(0) -> matrixifyVals();
+					} else 
+					{
+						zActivatedVals = n -> layers.at(i-1) -> matrixifyActivatedVals();
+					} 
+					transposedHidden = zActivatedVals -> transpose(); 
+					deltaWeights = new Matrix (transposedHidden -> getNumRows(),gradients -> getNumCols(),false); 
+					combined_spar_pf[0] -> parallel_for(0,transposedHidden -> getNumRows()+0,1,[&] (int i) {
+							
+							for(int j = 0; j < gradients -> getNumCols();j++)
+							{
+								
+								for(int k = 0; k < gradients -> getNumRows();k++)
+								{
+									double p = transposedHidden -> getValue(i,k)*gradients -> getValue(k,j); 
+									double newVal = deltaWeights -> getValue(i,j)+p; 
+									deltaWeights -> setValue(i,j,newVal);
+								}
+							}
+						}); 
+					tempNewWeights = new Matrix (n -> weightMatrices.at(i-1) -> getNumRows(),n -> weightMatrices.at(i-1) -> getNumCols(),false); 
+					combined_spar_pf[0] -> parallel_for(0,tempNewWeights -> getNumRows()+0,1,[&] (int r) {
+							
+							for(int c = 0; c < tempNewWeights -> getNumCols();c++)
+							{
+								double originalValue = n -> weightMatrices.at(i-1) -> getValue(r,c); 
+								double deltaValue = deltaWeights -> getValue(r,c); 
+								originalValue = n -> momentum*originalValue; 
+								deltaValue = n -> learningRate*deltaValue; 
+								tempNewWeights -> setValue(r,c,(originalValue-deltaValue));
+							}
+						}); 
+					newWeights.push_back(tempNewWeights); 
+					delete pGradients; 
+					delete transposedPWeights; 
+					delete hiddenDerived; 
+					delete zActivatedVals; 
+					delete transposedHidden; 
+					delete deltaWeights;
+				} 
+				delete gradients; 
+				
+				for(int i = 0; i < n -> weightMatrices.size();i++)
+				{
+					delete n -> weightMatrices[i];
+				} 
+				n -> weightMatrices.clear(); 
+				reverse(newWeights.begin(),newWeights.end()); 
+				
+				for(int i = 0; i < newWeights.size();i++)
+				{
+					n -> weightMatrices.push_back(newWeights[i]);
+				}
+			} 
+			image_processing_spar_data_Input_spar -> frame = Mat(frameSize,CV_64FC1,n -> layers.at(n -> layers.size()-1) -> getVals().data()); 
+			image_processing_spar_data_Input_spar -> frame.convertTo(image_processing_spar_data_Input_spar -> frame,CV_8UC1);
+		} 
+		ff_send_out (image_processing_spar_data_Input_spar); 
+		return (image_processing_spar_data_struct_spar0 *)GO_ON;
+	}
+}; 
+struct image_processing_spar_data_Stage_spar01 : ff_node_t < image_processing_spar_data_struct_spar0 >{
+	Mat frame; 
+	image_processing_spar_data_struct_spar0 * svc(image_processing_spar_data_struct_spar0 * image_processing_spar_data_Input_spar) {
+		{
+			vector < unsigned char > vec_denoiser(dWidth * dHeight); 
+			memcpy(vec_denoiser.data(),image_processing_spar_data_Input_spar -> frame.data,dWidth*dHeight*sizeof(unsigned char)); 
+			Detector * detector = new DetectorGaussian (dHeight,dWidth,false); 
+			detector -> init(vec_denoiser.data()); 
+			int * noisyMap = (int *)malloc(dHeight*dWidth*sizeof(int)); 
+			
+			for(unsigned int ri = 0; ri < dHeight;++ri)
+			{
+				
+				for(unsigned int ci = 0,x = ri*dWidth; ci < dWidth;++ci,++x)
+				{
+					noisyMap[x] = (detector -> isPixelNoisy(vec_denoiser.data(),vec_denoiser.at(x),ri,ci)) ? vec_denoiser.at(x) : - 1;
+				}
+			} 
+			unsigned int n_noisy = 0; 
+			
+			for(unsigned int ri = 0; ri < dHeight;++ri)
+			
+			for(unsigned int ci = 0,x = ri*dWidth; ci < dWidth;++ci,++x)
+			if(noisyMap[x] >= 0)
+			++n_noisy; 
+			unsigned int * noisyPixels = (unsigned int *)malloc(n_noisy*sizeof(unsigned int)); 
+			
+			for(unsigned int i = 0,ri = 0; ri < dHeight;++ri)
+			
+			for(unsigned int ci = 0,x = ri*dWidth; ci < dWidth;++ci,++x)
+			if(noisyMap[x] >= 0)
+			noisyPixels[i++] = x; 
+			unsigned char * restore_denoiser = (unsigned char *)malloc(dHeight*dWidth*sizeof(unsigned char)); 
+			unsigned char * diff = (unsigned char *)malloc(n_noisy*sizeof(unsigned char)); 
+			memset(diff,0,n_noisy*sizeof(unsigned char)); 
+			memcpy(restore_denoiser,vec_denoiser.data(),dHeight*dWidth*sizeof(unsigned char)); 
+			float * residuals = (float *)malloc(n_noisy*sizeof(float)); 
+			bool fixed = false; 
+			unsigned int restore_cycles = 0; 
+			float alfa = 1.3; 
+			float beta = 5; 
+			bool fixed_cycles = false; 
+			unsigned int max_cycles = 200; 
+			
+			while(true)
+			{
+				
+				for(unsigned int i = 0; i < n_noisy;++i)
+				{
+					unsigned int x = noisyPixels[i]; 
+					int idx = x; 
+					unsigned char pixel = vec_denoiser.at(idx); 
+					int idx_neighbor = idx-dWidth*(idx >= dWidth); 
+					unsigned char up_val = vec_denoiser.at(idx_neighbor); 
+					unsigned char up_noisy = (noisyMap[idx_neighbor] >= 0); 
+					idx_neighbor = idx+dWidth*(idx < ((dHeight-1)*dWidth)); 
+					unsigned char down_val = vec_denoiser.at(idx_neighbor); 
+					unsigned char down_noisy = (noisyMap[idx_neighbor] >= 0); 
+					idx_neighbor = idx-((idx%dWidth) > 0); 
+					unsigned char left_val = vec_denoiser.at(idx_neighbor); 
+					unsigned char left_noisy = (noisyMap[idx_neighbor] >= 0); 
+					idx_neighbor = idx+((idx%dWidth) < (dWidth-1)); 
+					unsigned char right_val = vec_denoiser.at(idx_neighbor); 
+					unsigned char right_noisy = (noisyMap[idx_neighbor] >= 0); 
+					idx_neighbor = idx-1-dWidth*(idx >= dWidth); 
+					unsigned char upl_val = vec_denoiser.at(idx_neighbor); 
+					unsigned char upl_noisy = (noisyMap[idx_neighbor] >= 0); 
+					idx_neighbor = idx+1-dWidth*(idx >= dWidth); 
+					unsigned char upr_val = vec_denoiser.at(idx_neighbor); 
+					unsigned char upr_noisy = (noisyMap[idx_neighbor] >= 0); 
+					idx_neighbor = idx-1+dWidth*(idx < ((dHeight-1)*dWidth)); 
+					unsigned char downl_val = vec_denoiser.at(idx_neighbor); 
+					unsigned char downl_noisy = (noisyMap[idx_neighbor] >= 0); 
+					idx_neighbor = idx+1+dWidth*(idx < ((dHeight-1)*dWidth)); 
+					unsigned char downr_val = vec_denoiser.at(idx_neighbor); 
+					unsigned char downr_noisy = (noisyMap[idx_neighbor] >= 0); 
+					unsigned char u = 0; 
+					float S; 
+					float Fu,u_min = 0.0f,Fu_prec = FLT_MAX; 
+					float beta_ = beta; 
+					
+					for(int uu = 0; uu < 256;++uu)
+					{
+						u = (unsigned char)uu; 
+						Fu = 0.0f; 
+						S = 0.0f; 
+						S += (float)(2-up_noisy)*sqrt(_ABS((int)u-(int)up_val)*_ABS((int)u-(int)up_val)+alfa); 
+						S += (float)(2-down_noisy)*sqrt(_ABS(((int)u-(int)down_val))*_ABS(((int)u-(int)down_val))+alfa); 
+						S += (float)(2-left_noisy)*sqrt(_ABS(((int)u-(int)left_val))*_ABS(((int)u-(int)left_val))+alfa); 
+						S += (float)(2-right_noisy)*sqrt(_ABS(((int)u-(int)right_val))*_ABS(((int)u-(int)right_val))+alfa); 
+						S += (float)(2-upl_noisy)*sqrt(_ABS((int)u-(int)upl_val)*_ABS((int)u-(int)upl_val)+alfa); 
+						S += (float)(2-upr_noisy)*sqrt(_ABS((int)u-(int)upr_val)*_ABS((int)u-(int)upr_val)+alfa); 
+						S += (float)(2-downl_noisy)*sqrt(_ABS(((int)u-(int)downl_val))*_ABS(((int)u-(int)downl_val))+alfa); 
+						S += (float)(2-downr_noisy)*sqrt(_ABS(((int)u-(int)downr_val))*_ABS(((int)u-(int)downr_val))+alfa); 
+						Fu = ((float)_ABS(u-pixel)+(beta_*S)); 
+						if(Fu < Fu_prec)
+						{
+							u_min = u; 
+							Fu_prec = Fu;
+						}
+					} 
+					restore_denoiser[x] = (unsigned char)(u_min+0.5f); 
+					unsigned char newdiff = (unsigned char)(_ABS((int)(restore_denoiser [x])- noisyMap[x])); 
+					residuals[i] = (float)(_ABS((int)newdiff-(int)(diff[i]))); 
+					diff[i] = newdiff;
+				} 
+				float residual = 0.0f; 
+				
+				for(unsigned int i = 0; i < n_noisy;++i)
+				residual += residuals[i]; 
+				residual /= n_noisy; 
+				++restore_cycles; 
+				if(fixed_cycles)
+				fixed = restore_cycles == max_cycles; else 
+				fixed = residual < RESIDUAL_THRESHOLD || restore_cycles >= max_cycles; 
+				if(fixed)
+				break;
+			} 
+			free (noisyPixels); 
+			free (diff); 
+			free (residuals); 
+			free (noisyMap); 
+			delete detector; 
+			image_processing_spar_data_Input_spar -> frame = Mat(frameSize,CV_8UC1,restore_denoiser);
+		} 
+		ff_send_out (image_processing_spar_data_Input_spar); 
+		return (image_processing_spar_data_struct_spar0 *)GO_ON;
+	}
+}; 
+struct image_processing_spar_data_Stage_spar02 : ff_node_t < image_processing_spar_data_struct_spar0 >{
+	image_processing_spar_data_struct_spar0 * svc(image_processing_spar_data_struct_spar0 * image_processing_spar_data_Input_spar) {
+		{
+			Mat frame_output = Mat(frameSize,CV_8UC1); 
+			{
+				std::vector < int > kernel_x ({1,2,1,0,0,0,- 1,- 2,- 1
+				}); 
+				std::vector < int > kernel_y ({1,0,- 1,2,0,- 2,1,0,- 1
+				}); 
+				int kernel_size = 3; 
+				unsigned char * data_in = (unsigned char *)(image_processing_spar_data_Input_spar -> frame.data); 
+				unsigned char * data_out = (unsigned char *)(frame_output.data); 
+				
+				for(int row = 0; row < dHeight;++row)
+				{
+					
+					for(int col = 0; col < dWidth;col += 1)
+					{
+						if(row <= kernel_size / 2 || row >= dHeight-kernel_size / 2 || col <= kernel_size / 2 || col >= dWidth-kernel_size / 2)
+						{
+							data_out[frame_output.step*row+col] = 0; 
+							continue;
+						} 
+						int sum_x = 0,sum_y = 0; 
+						int k_ind = 0; 
+						
+						for(int k_row = - kernel_size / 2; k_row <= kernel_size / 2;++k_row)
+						{
+							
+							for(int k_col = - kernel_size / 2; k_col <= kernel_size / 2;++k_col)
+							{
+								sum_x += kernel_x[k_ind]*data_in[image_processing_spar_data_Input_spar -> frame.step*(row+k_row)+col+k_col]; 
+								sum_y += kernel_y[k_ind]*data_in[image_processing_spar_data_Input_spar -> frame.step*(row+k_row)+col+k_col]; 
+								k_ind++;
+							}
+						} 
+						int G = unsigned(std::sqrt(sum_x*sum_x+sum_y*sum_y)); 
+						data_out[frame_output.step*row+col] = std::min(G,255);
+					}
+				}
+			} 
+			frame_output.convertTo(frame_output,CV_8UC1); 
+			num_frames++; 
+			oVideoWriter.write(frame_output);
+		} 
+		delete image_processing_spar_data_Input_spar; 
+		return (image_processing_spar_data_struct_spar0 *)GO_ON;
+	}
+}; 
+struct image_processing_spar_data_ToStream_spar0 : ff_node_t < image_processing_spar_data_struct_spar0 >{
+	VideoCapture video; 
+	image_processing_spar_data_struct_spar0 * svc(image_processing_spar_data_struct_spar0 * image_processing_spar_data_Input_spar) {
+		
+		while(1)
+		{
+			Mat frame; 
+			video>>frame; 
+			if(frame.empty())
+			break; 
+			cvtColor(frame,frame,COLOR_RGB2GRAY); 
+			frame.convertTo(frame,CV_64FC1); 
+			image_processing_spar_data_struct_spar0 * stream_spar = new image_processing_spar_data_struct_spar0 (frame); 
+			ff_send_out (stream_spar); 
+			; 
+			;
+		} 
+		return EOS;
+	}
+}; 
+int main(int argc,char * * argv) {
+	image_processing_spar_data_ToStream_spar0 image_processing_spar_data_ToStream_spar0_call; 
+	image_processing_spar_data_Stage_spar00 image_processing_spar_data_Stage_spar00_call; 
+	std::vector < std::unique_ptr < ff_node > > image_processing_spar_data_Stage_spar01_workers (spar::get_Num_Workers()); 
+	
+	for(unsigned int image_processing_spar_data_Stage_spar01_i = 0; image_processing_spar_data_Stage_spar01_i < spar::get_Num_Workers();++image_processing_spar_data_Stage_spar01_i)
+	image_processing_spar_data_Stage_spar01_workers[image_processing_spar_data_Stage_spar01_i] = std::unique_ptr < ff_node >(new image_processing_spar_data_Stage_spar01); 
+	ff_OFarm < image_processing_spar_data_struct_spar0 > image_processing_spar_data_Stage_spar01_call (std::move(image_processing_spar_data_Stage_spar01_workers)); 
+	image_processing_spar_data_Stage_spar01_call.add_emitter(image_processing_spar_data_Stage_spar00_call); 
+	std::vector < std::unique_ptr < ff_node > > image_processing_spar_data_Stage_spar02_workers (spar::get_Num_Workers()); 
+	
+	for(unsigned int image_processing_spar_data_Stage_spar02_i = 0; image_processing_spar_data_Stage_spar02_i < spar::get_Num_Workers();++image_processing_spar_data_Stage_spar02_i)
+	image_processing_spar_data_Stage_spar02_workers[image_processing_spar_data_Stage_spar02_i] = std::unique_ptr < ff_node >(new image_processing_spar_data_Stage_spar02); 
+	ff_OFarm < image_processing_spar_data_struct_spar0 > image_processing_spar_data_Stage_spar02_call (std::move(image_processing_spar_data_Stage_spar02_workers)); 
+	ff_Pipe < image_processing_spar_data_struct_spar0 > pipeline0(image_processing_spar_data_ToStream_spar0_call,image_processing_spar_data_Stage_spar01_call,image_processing_spar_data_Stage_spar02_call); 
+	combined_spar_pf = new ff::ParallelFor * [spar::get_Num_Workers()]; 
+	
+	for(int i = 0; i < spar::get_Num_Workers();i++)
+	{
+		combined_spar_pf[i] = new ff::ParallelFor (spar::get_Num_Workers2(),true);
+	} 
+	spar_pf = new ff::ParallelFor (spar::get_Num_Workers2(),true); 
+	if(argc < 2)
+	{
+		cout<<"Usage: ./bin video_dir"<<endl; 
+		return 1;
+	} 
+	string video_dir = argv[1]; 
+	VideoCapture video; 
+	video.open(video_dir); 
+	if(! video.isOpened())
+	return - 1; 
+	num_frames = 0; 
+	dWidth = video.get(CAP_PROP_FRAME_WIDTH); 
+	dHeight = video.get(CAP_PROP_FRAME_HEIGHT); 
+	fps = video.get(CAP_PROP_FPS); 
+	total_frames = video.get(CAP_PROP_FRAME_COUNT); 
+	frameSize = Size(static_cast < int >(dWidth),static_cast < int >(dHeight)); 
+	vector < int > topology; 
+	topology.push_back(dWidth*dHeight); 
+	topology.push_back(100); 
+	topology.push_back(100); 
+	topology.push_back(dWidth*dHeight); 
+	double learningRate = 0.05; 
+	double momentum = 1; 
+	double bias = 1; 
+	n = new NeuralNetwork (topology,RELU,SIGM,TANH,bias,learningRate,momentum); 
+	oVideoWriter.open("output.mp4",cv::VideoWriter::fourcc('m','p','4','v'),fps,frameSize,false); 
+	auto t_start = std::chrono::steady_clock::now(); 
+	image_processing_spar_data_ToStream_spar0_call.video = video; 
+	if(pipeline0.run_and_wait_end() < 0)
+	{
+		error("Running pipeline\n"); 
+		exit(1);
+	} 
+	auto t_end = std::chrono::steady_clock::now(); 
+	std::cout<<"Time (ms): "<<std::chrono::duration_cast < std::chrono::milliseconds >(t_end-t_start).count()<<std::endl; 
+	video.release(); 
+	oVideoWriter.release(); 
+	return 0;
+}