Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed incorrect implmentation of image/conv2 and dsp/conv, hardwired network for p_median3x3_f32 #100

Closed
wants to merge 9 commits into from
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ FUNCTION | NOTES
[p_max()](src/math/p_max.c) | finds max val
[p_min()](src/math/p_min.c) | finds min val
[p_mean()](src/math/p_mean.c) | mean operation
[p_median()](src/math/p_mean.c) | finds middle value
[p_median()](src/math/p_median.c) | finds middle value
[p_mode()](src/math/p_mode.c) | finds most common value
[p_mul()](src/math/p_mul.c) | multiplication
[p_popcount()](src/math/p_popcount.c) | count the number of bits set
Expand Down
7 changes: 4 additions & 3 deletions include/pal_image.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
* r : output matrix pointer
* rows : rows in input image
* columns : columns in input image (multiple of 4)
* msize : mask size (square)
* mrows : mask rows
* mcols : mask cols
* opt : options
* p : number of processors "parallelism"
* team : team to work with
Expand All @@ -18,8 +19,8 @@
*/

/*2d convolution */
void p_conv2d_f32(const float *x, float *m, float *r, int rows, int cols,
int msize, int p, p_team_t team);
void p_conv2d_f32(const float *x, const float *m, float *r, int rows, int cols,
int mrows, int mcols, int p, p_team_t team);

/*2d box (i.e mean) filter(3x3) */
void p_box3x3_f32(const float *x, float *r, int rows, int cols,
Expand Down
31 changes: 27 additions & 4 deletions src/dsp/p_conv.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
/*
Remove this comment after merge

// FreeMat
//--> x = [1, 2, 3, 4];
//--> h = [5,6,7];
//--> r = conv(x, h)
//r =
// 5 16 34 52 45 28

// result from pal :
float x[4] = {1,2,3,4};
float h[3] = {5,6,7} ;
float r[10] = {100,100,100,100,100,100, 100, 100,100, 100};
p_conv_f32( x, h , r ,4 ,3);
printf ("%f, %f, %f, %f, %f, %f, %f, %f", r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7]);
//100.000000, 105.000000, 116.000000, 134.000000, 152.000000, 145.000000, 128.000000, 100.000000
*/

#include <pal.h>

/**
Expand Down Expand Up @@ -27,11 +46,15 @@ void p_conv_f32(const float *x, const float *h, float *r,
{
const float *xc = x;
float *rx = r;
for ( int i = 0; i < nx; i++) {
int i,j ;
for ( i = 0; i < nx+nh-1; i++) { *(rx++) = 0; }
rx = r ;
for ( i = 0; i < nx; i++) {
float xv = *xc++;
rx++;
for (int j = 0; j < nh; j++) {
*(rx + j) += xv * *(h + j);

for (j = 0; j < nh; j++) {
*(rx + j) += xv * *(h + j);
}
rx++;
}
}
106 changes: 84 additions & 22 deletions src/image/p_conv2d.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,66 @@
/*
Remove this comment after merge
// A= [17 24 1 8 15; 23 5 7 14 16; 4 6 13 20 22;10 12 19 21 3; 11 18 25 2 9]
// B=[1 3 1; 0 5 0; 2 1 2]
// E=[0 0 0; 0 1 0; 0 0 0]
// C2 = conv2(A,E,'valid') <-- works correctly since kernel is symmetric
// C1 = conv2(A,B,'valid') <-- Doesn't work correctly

// Response from pal:
// 155.0, 135.0, 200.0,
// 145.0, 190.0, 230.0,
// 185.0, 225.0, 270.0,

// Response from FreeMat:
// 120, 165, 205,
// 160, 200, 245,
// 190, 255, 235,

#define W 5
#define W2 (W-2)

int main(int argc, char *argv[])
{

int i, j;

// http://www.johnloomis.org/ece563/notes/filter/conv/convolution.html
float src[5*5] = {17,24,1,8,15,
23,5,7,14,16,
4,6,13,20,22,
10,12,19,21,3,
11,18,25,2,9 };

float kernel[3*3] = {
1,3,1,
0,5,0,
2,1,2 };

float dest[3*3];
p_conv2d_f32(src, kernel, dest, 5, 5, 3);

// src
for (i = 0; i < W; i++) {
for (j = 0; j < W; j++) {
printf("%.1f, ", src[i*W+j]);
}
printf("\n");
}

printf("\n");

// response
for (i = 0; i < W2; i++) {
for (j = 0; j < W2; j++) {
printf("%.1f, ", dest[i*W2+j]);
}
printf("\n");
}
}
*/



#include <pal.h>

/** Convolution on input image 'x' with a square kernel 'm' of size 'msize'.
Expand All @@ -12,7 +75,9 @@
*
* @param cols Number of columns in input image
*
* @param msize Size of convolution kernel
* @param mrows number of rows in convolution kernel
*
* @param mcols number of cols in convolution kernel
*
* @param p Number of processor to use (task parallelism)
*
Expand All @@ -22,37 +87,34 @@
*
*/

void p_conv2d_f32(const float *x, float *m, float *r, int rows, int cols,
int msize, int p, p_team_t team)
void p_conv2d_f32(const float *x, const float *m, float *r, int rows, int cols,
int mrows, int mcols, int p, p_team_t team)

{
int i, j, k;
float P, part;
int i, j, ki, kj;
float P;
const float *px, *pm;
float *pr;

px = x;
pm = m;
pr = r;

for (i = msize * 0.5; i < (rows - msize * 0.5); i++) {
for (j = msize * 0.5; j < (cols - msize * 0.5); j++) {
for (i = 0; i < rows - mrows+1 ; i++) {
for (j = 0; j < cols - mcols+1 ; j++) {
P = 0.0f;
pm = m;
for (k = 0; k < msize; k++) {
p_dot_f32(px, pm, &part, msize, 0, team);
P += part;
px += cols;
pm += msize;
pm = m+(mcols * mrows)-1;
for (ki=0 ; ki< mrows ; ki++){
for (kj=0 ; kj< mcols ; kj++){
// printf("(%f*%f)", *px, *pm); // unroll: remove me
P+= (*px++)* (*pm--) ;
}
px += cols - mcols;
}
*pr = P;
pr++;
// move image pointer one index forward compared to
// the position from before `for` loop
px += 1 - msize * cols;
//printf("=[%f]\n", P); // sum : remove me
px -= (mrows * cols) -1 ;
*(pr++) = P;
}
// move image pointer to the beginning of line
// beneath the current line
px += (int)(msize * 0.5) * 2;
printf("\n\n");
px+=mcols-1 ;
}
}
89 changes: 62 additions & 27 deletions src/image/p_median3x3.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,45 @@
#include <pal.h>

/*
* The following routines have been built from knowledge gathered
* around the Web. I am not aware of any copyright problem with
* them, so use it as you want.
* N. Devillard - 1998
*/

typedef float pixelvalue ;

#define PIX_SORT(a,b) { if ((a)>(b)) PIX_SWAP((a),(b)); }
#define PIX_SWAP(a,b) { pixelvalue temp=(a);(a)=(b);(b)=temp; }

/*----------------------------------------------------------------------------
Function : opt_med9()
In : pointer to an array of 9 pixelvalues
Out : a pixelvalue
Job : optimized search of the median of 9 pixelvalues
Notice : in theory, cannot go faster without assumptions on the
signal.
Formula from:
XILINX XCELL magazine, vol. 23 by John L. Smith

The input array is *NOT* modified in the process
The result array is guaranteed to contain the median
value
---------------------------------------------------------------------------*/

pixelvalue opt_med9(pixelvalue * pointer)
{
pixelvalue p[9];
memcpy(p, pointer, 9*sizeof(pixelvalue) );
PIX_SORT(p[1], p[2]) ; PIX_SORT(p[4], p[5]) ; PIX_SORT(p[7], p[8]) ;
PIX_SORT(p[0], p[1]) ; PIX_SORT(p[3], p[4]) ; PIX_SORT(p[6], p[7]) ;
PIX_SORT(p[1], p[2]) ; PIX_SORT(p[4], p[5]) ; PIX_SORT(p[7], p[8]) ;
PIX_SORT(p[0], p[3]) ; PIX_SORT(p[5], p[8]) ; PIX_SORT(p[4], p[7]) ;
PIX_SORT(p[3], p[6]) ; PIX_SORT(p[1], p[4]) ; PIX_SORT(p[2], p[5]) ;
PIX_SORT(p[4], p[7]) ; PIX_SORT(p[4], p[2]) ; PIX_SORT(p[6], p[4]) ;
PIX_SORT(p[4], p[2]) ; return(p[4]) ;
}

/*
* A median 3x3 filter.
*
Expand All @@ -19,45 +59,40 @@
*
*/

void p_median3x3_f32(const float *x, float *r, int rows, int cols,
int p, p_team_t team)
void p_median3x3_f32(const float *x, float *r, int rows, int cols,
int p, p_team_t team)
{
float buffer[9];
const float *px;
float *pr;
int i, j, buffer_col;
float sorted[9] ;
float *px, *pr;
int i, j, buffer_pointer;

px = x;
pr = r;

for (i = 0; i < rows - 2; i++) {
// fully filling first window
buffer[0] = *px;
buffer[1] = *(px + 1);
buffer[2] = *(px + 2);
buffer[0] = *(px);
buffer[1] = *(px + cols);
buffer[2] = *(px + cols + cols);

buffer[3] = *(px + cols);
buffer[3] = *(px + 1);
buffer[4] = *(px + cols + 1);
buffer[5] = *(px + cols + 2);

buffer[6] = *(px + cols + cols);
buffer[7] = *(px + cols + cols + 1);
buffer[8] = *(px + cols + cols + 2);

p_median_f32(buffer, pr, 9, 0, 0);
pr++;
px += 3;
buffer[5] = *(px + cols + cols + 1);
buffer_pointer = 6 ;
// other windows differ only by one column
// so only one is exchanged
for (j = 0; j < cols - 3; j++) {
buffer_col = j % 3;
buffer[buffer_col] = *px;
buffer[buffer_col + 3] = *(px + cols);
buffer[buffer_col + 6] = *(px + cols + cols);

p_median_f32(buffer, pr, 9, 0, 0);
// so only one column is added to the place where buffer pointer points
for (j = 2; j < cols ; j++) {
// in each iteration, three values are replaced in the circular queue
buffer_pointer = buffer_pointer % 9;
buffer[buffer_pointer] = *(px + j ) ;
buffer[buffer_pointer+1] = *(px + j + cols );
buffer[buffer_pointer+2] = *(px + j + cols + cols );
buffer_pointer+= 3 ;
//p_median_f32(buffer, pr, 9, 0, 0);
*pr = opt_med9(buffer);
pr++;
px++;
}
px += cols ;
}
}
Loading