Actually DCT is multiplication of 2-D Constant matrix to vector, or input stream. It could be implemented as pipelined parallelism approach. Let's see parallelism,
As the input X[i] is streamed and shifted, constants, noted as W[i][j] have also shifted index of j. So, let's find a rule for index j.
Use one counter as address generator or simply, re-aligning starting address as for pipelining of X. Later method is prefered, because of keeping easy-reading constant table in external file, included into ROM table implementation.
The design could be divided into repetitive element, i.e. Processing Element and all elements are behave like a Systolic Array of parallelism. Each PE is made of input X and W latch, and MAC(Multiplier-Accumulator). Length of Pipelined PE can be varied. For this purpose, it is parameterized design.
Detailed internal architecture of PE and IO timing is as follows. There are three-latency for the pipeline; two for PE and one for final streamed-out purpose.
Simulation result.
And Testbench where SystemC model is used; Generation of input stimulus, X and checking for the result between C-model and SystemVerilog Model.
Codes:
C-model of DCT, simply nested loop.
-----------------
//
// File: dct.h
//
#define DW 16
typedef short dct_data_t;
#define DCT_SIZE 8 /* defines the input matrix as 8x8 */
#define CONST_BITS 13
#define DESCALE(x,n) (((x) + (1 << ((n)-1))) >> n)
void dct_1d(dct_data_t src[DCT_SIZE], dct_data_t dst[DCT_SIZE]);
-----------------
//// File: dct.cpp
//
#include "dct.h"
void dct_1d(dct_data_t src[DCT_SIZE], dct_data_t dst[DCT_SIZE])
{
unsigned int k, n;
int tmp;
const dct_data_t dct_coeff_table[DCT_SIZE][DCT_SIZE] = {
8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
11363, 9633, 6436, 2260, -2260, -6436, -9632,-11362,
10703, 4433, -4433,-10703,-10703, -4433, 4433, 10703,
9633, -2260,-11362, -6436, 6436, 11363, 2260, -9632,
8192, -8192, -8192, 8192, 8192, -8191, -8191, 8192,
6436,-11362, 2260, 9633, -9632, -2260, 11363, -6436,
4433,-10703, 10703, -4433, -4433, 10703,-10703, 4433,
2260, -6436, 9633,-11362, 11363, -9632, 6436, -2260
};
for (k = 0; k < DCT_SIZE; k++) {
for(n = 0, tmp = 0; n < DCT_SIZE; n++) {
int coeff = (int)dct_coeff_table[k][n];
tmp += src[n] * coeff;
}
dst[k] = DESCALE(tmp, CONST_BITS);
}
}
------------------------------------------
SystemVerilog Model
------------------------------------------
//
// Project: N-Point DCT
// File : dct_defs.sv
// Author : Kook, goodkook@gmail.com
//
package dct_defs;
parameter WIDTH_X = 16;
parameter WIDTH_Y = 16;
parameter WIDTH_W = 16;
parameter POINT_N = 8;
parameter CONST_BITS = 13;
`define CLOG2(x) \
(x <= 2) ? 1 : \
(x <= 4) ? 2 : \
(x <= 8) ? 3 : \
(x <= 16) ? 4 : \
(x <= 32) ? 5 : \
(x <= 64) ? 6 : \
(x <= 128) ? 7 : \
(x <= 256) ? 8 : \
(x <= 512) ? 9 : \
(x <= 1024) ? 10 : \
-1
parameter DEPTH_W = `CLOG2(POINT_N); //$clog2(POINT_N);
endpackage
---------------------------------------------------
//
// Project: N-Point DCT
// File : dct_W_ROM.sv
// Author : Kook, goodkook@gmail.com
//
////////////////////////////////////////////////////////////////////////////////
//
// Coeff. Constant store in 2-D ROM
// --------------------------------
// +---------+---------+- -+------------+
// ROM_W[ 0]-->|W[ 0][0]|W[ 0][1]| .... |W[ 0][N-1]|
// +---------+---------+- -+------------+
// ROM_W[ 1]-->|W[ 1][0]|W[ 1][1]| .... |W[ 1][N-1]|
// +---------+---------+- -+------------+
// | .....ROM data from external file..... |
// +---------+---------+- -+------------+
// ROM_W[N-1]-->|W[N-1][0]|W[N-1][1]|........|[W[N-1][N-1]|
// +---------+---------+- -+------------+
//
// ROM(Coeff. Table) Address generator
// -----------------------------------
// +--------------+
// +-[X-0]----->|Addr.W[0][x] -->init_ACC[0]
// +--------------+ | +--------------+
// |N-bits counter| +-[X-1]----->|Addr.W[1][x] -->init_ACC[1]
// Req_X<-- for --[X]->| +--------------+
// | Wnx ROM Addr | | ........
// +--------------+ | +--------------+
// +-[X-(N-1)]->|Addr.W[N-1][x]->init_ACC[N-1]
// +--------------+
//
// * "init_ACC", when Address for W[N][x]=0
// * "Req_X", when counter X=0
//
// ROM Data out sequence, when N=4
// -------------------------------
// W[0][x] W[1][x] W[2][x] W[3][x]
// +-------+ +-------+ +-------+ +-------+
// Start->|W[0][0]| |W[1][0]| |W[2][0]| |W[3][0]|
// +-------+ +-------+ +-------+ +-------+
// |W[0][1]| |W[1][1]| |W[2][1]| Start->|W[3][1]|
// +-------+ +-------+ +-------+ +-------+
// |W[0][2]| |W[1][2]| Start->|W[2][2]| |W[3][2]|
// +-------+ +-------+ +-------+ +-------+
// |W[0][3]| Start->|W[1][3]| |W[2][3]| |W[3][3]|
// +-------+ +-------+ +-------+ +-------+
//
// t0: W[0][0] ... ... ...
// t1: W[0][1] W[1][0] ... ...
// t2: W[0][2] W[1][1] W[2][0] ...
// t3: W[0][3] W[1][2] W[2][1] W[3][0]
// t4: ... W[1][3] W[2][2] W[3][1]
// t5: ... ... W[2][3] W[3][2]
// t6: ... ... ... W[3][3]
//
////////////////////////////////////////////////////////////////////////////////
import dct_defs::*;
module dct_W_ROM(
input clk,
input rst,
output logic req,
output logic init_ACC[POINT_N],
output logic [WIDTH_W-1:0] W[POINT_N]);
// ROMs and read contents from external file
logic [WIDTH_W-1:0] ROM_W[POINT_N][POINT_N] = {
`include "dct_ROM_Table.txt"};
// Address generator
logic unsigned [DEPTH_W-1:0] addr_W;
always_ff @(posedge clk, posedge rst)
begin : Gen_W_ROM_Addr
if (rst==1)
addr_W = 0; // Coeff. ROM data out, wait for 1-clock after reset
else
addr_W += 1;
end
// ROM Data out
genvar i;
wire [DEPTH_W-1:0] addr_Wn[POINT_N];
for (i=0; i<POINT_N; i++)
begin
// ROM Address delayed for Pipelined operation
assign addr_Wn[i] = addr_W - i;
assign W[i] = ROM_W[i][addr_Wn[i]];
//assign W[i] = ROM_W[i][addr_W-i];
// Control signal for initializing PE's accumulator
assign init_ACC[i] = (addr_Wn[i]==0)? 1:0;
end
//assign req = (addr_W==0)? 1:0;
assign req = (addr_W=='1)? 1:0;
endmodule
----------------------------------------------------------
//
// Project: N-Point DCT
// File : dct_ROM_Table.txt
// Author : Kook, goodkook@gmail.com
//
// 2-Dimensional Array Contents
// for 8-Point DCT
//
{ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192},
{11363, 9633, 6436, 2260, -2260, -6436, -9632,-11362},
{10703, 4433, -4433,-10703,-10703, -4433, 4433, 10703},
{ 9633, -2260,-11362, -6436, 6436, 11363, 2260, -9632},
{ 8192, -8192, -8192, 8192, 8192, -8191, -8191, 8192},
{ 6436,-11362, 2260, 9633, -9632, -2260, 11363, -6436},
{ 4433,-10703, 10703, -4433, -4433, 10703,-10703, 4433},
{ 2260, -6436, 9633,-11362, 11363, -9632, 6436, -2260}
---------------------------------------------------------
//
// Project: N-Point DCT
// File : dct_PE.sv
// Author : Kook, goodkook@gmail.com
//
///////////////////////////////////////////////////////////////////////
//
// Processing Element for Pipelined 1-D DCT
// ----------------------------------------
// - Parametized design in
// WIDTH_X, WIDTH_W, WIDTH_Y, CONST_BITS
//
// [WIDTH_X] +-----+
// in_X------\----> -----------+---------------------------->out_X
// | | |
// clk--> | |
// +-----+ |
// \
// [WIDTH_W] +-----+ |
// in_W------\----> -------+ |
// | | | |
// clk--> | | |
// +-----+ +--V---V--+
// \ MULTPLY /
// +---|---+
// |
// +-----\-----+ \[WIDTH_X+WIDTH_W]
// | | |
// | +-V---V-+
// | \ ADD /
// | +--|--+
// +---|---+ |
// | MUX | |
// init_ACC--> 1 0 | \[WIDTH_X+WIDTH_W+1]
// +-|---|-+ |
// | | |
// '0 | +-----+ |
// +<-- <--+
// | | ACC |
// | | <--clk
// | +-----+
// |
// | +-------+ [WIDTH_Y]
// +--|DESCALE|-----\-----------> out_Y(Yn)
// +-------+
// [CONST_BITS]
//
///////////////////////////////////////////////////////////////////////
import dct_defs::*;
`define DESCALE(x,n) (((x) + (1 << ((n)-1))) >> n)
module dct_PE(
input clk,
input rst,
input [WIDTH_X-1:0] in_X,
input [WIDTH_W-1:0] in_W,
input init_ACC,
output logic [WIDTH_X-1:0] out_X,
output logic valid_Y,
output [WIDTH_Y-1:0] out_Y );
// Register for X to be shifted
always_ff @(posedge clk, posedge rst)
begin : Reg_X
if (rst==1)
out_X = 0;
else
out_X = in_X;
end
// Latch W & init_ACC input (Delay, because of in_X shift)
logic signed [WIDTH_W-1:0] latch_W;
logic _init_ACC;
always_ff @(posedge clk, posedge rst)
begin : Reg_W
if (rst==1) begin
latch_W = 0;
_init_ACC = 0;
end
else begin
latch_W = in_W;
_init_ACC = init_ACC;
end
end
// Multiplier
logic signed [(WIDTH_W+WIDTH_X)-1:0] mult_W_X;
assign mult_W_X = signed'(latch_W) * signed'(out_X);
// Accunulator
logic signed [(WIDTH_W+WIDTH_X)-1:0] Acc;
always_ff @(posedge clk, posedge rst)
begin : Accmulator
if (rst==1)
Acc = 0;
else
begin
if (_init_ACC==1)
Acc = mult_W_X;
else
Acc = signed'(Acc) + signed'(mult_W_X);
end // if-else
end // always_ff
// Output Y
assign out_Y = `DESCALE(Acc, CONST_BITS); // De-Scale!!!
assign valid_Y = _init_ACC;
/*
always_ff @(posedge clk, posedge rst)
begin
if (rst==1)
valid_Y = 0;
else
valid_Y = init_ACC;
end
*/
endmodule
----------------------------------------------------
//
// Project: N-Point DCT
// File : dct_Out.sv
// Author : Kook, goodkook@gmail.com
//
//
///////////////////////////////////////////////////////////////////////////////
//
// Serialized out_Y
// ----------------
//
// from multiple PE (it was pipelined parallelism)
// +-----\
// | \
// out_Y[0]--\--> S \
// | E |
// out_Y[1]--\--> L |
// | E | +------+
// out_Y[2]--\--> C | | |
// | T ----\--> ---\--> out_Y
// | | | |
// ..... | | |
// | Y | clk-> |
// | | | |
// out_Y[N-1]-\--> / +------+
// | /
// +--|--/
// |
// valid_Y[0]-------+
// valid_Y[1]-------+
// valid_Y[2]-------+
// .... |
// valid_Y[N-1]-----+
//
// +-+ +-+ +-+ +-+ +-+ +-+ +-+ +-+ +-+ +-+ +-+
// | | | | | | | | | | | | | | | | | | | | | |
// clk --+ +-+ +-+ +-+ +-+ +-..... -+ +-+ +-+ +-+ +-+ +-+ +-
//
// +---+ +---+
// | | | |
// valid_Y[0]--+ +-----------.....-------------------+ +--------
//
// --+---+----------- -------------------+---+--------
// out_Y[0]..| |... ..... ...| |...
// --+---+----------- -------------------+---+--------
//
// +---+ +---+
// | | | |
// valid_Y[1]------+ +-------.....-----------------------+ +---
//
// --+---+------- -----------------------+---+---
// out_Y[1] ...| |.... ..... ...| |...
// --+---+------- -----------------------+---+--
//
// ..............
// +---+
// | |
// valid_Y[N-1]------------------.....---------------+ +---
//
// ------------------ ---------------+---+-------------
// out_Y[N-1] ..... ...| |....
// ------------------ ---------------+---+-------------
//
///////////////////////////////////////////////////////////////////////////////
import dct_defs::*;
module dct_Out(
input clk,
input rst,
input valid_Y[POINT_N],
input [WIDTH_Y-1:0] in_Y[POINT_N],
output logic [WIDTH_Y-1:0] out_Y,
output logic rdy_Y);
logic _valid_Y[POINT_N*2];
// Serialize for output Y
integer i;
always_ff @(posedge clk, posedge rst)
begin
if (rst==1)
out_Y = 0;
else
for (i=0; i<POINT_N; i++)
if (valid_Y[i]==1)
out_Y = in_Y[i];
end
// Ready for the first Y
always_ff @(posedge clk, posedge rst)
begin
if (rst==1)
begin
for (i=0; i<(POINT_N*2); i++)
_valid_Y[i] = 0;
rdy_Y = 0;
end
else
begin
_valid_Y[0] <= valid_Y[0];
for (i=1; i<(POINT_N*2); i++)
_valid_Y[i] <= _valid_Y[i-1]; // Shift!
rdy_Y <= _valid_Y[(POINT_N*2)-1];
end
end
endmodule
-------------------------------------------------------
//
// Project: N-Point DCT
// File : dct_top.sv
// Author : Kook, goodkook@gmail.com
//
//////////////////////////////////////////////////////////////////////////////
// N-Point DCT
// -----------
// - Commonly used kernel in DSP: 2D_Matrix(Coeff.) * Vector
// - Applicable to DCT
// - Pipelined Parallelism
// Pully Pipelined(0-Clock delay)
// Latency: N+1 Clocks from first input
// - Parametized design
// Pipeline depth(Number of Vector length
// Width of input X, coeff-W and output Y
//
// +---------------------------------------+
// | Parametized PEs |
// | PE0 PE1 PEn |
// | +---+ +---+ +---+ |
// | | | | | | | |
// in_X --->----> ------> -->......--> --------->out_X
// | | | | | | | |
// |W0x-> | W1x-> | Wnx-> | |
// | +-|-+ +-|-+ +-|-+ |
// | | | | |
// | +->Y0 +->Y1 +->Yn |
// | |
// | +-----------+ +-----------+ |
// | W<-- Generate W| Y0-> ----->out_Y
// | | 2-D Array | Y1-> Serialize | |
// req_X<---------- ROM | ..| Yn -----> rdy_Y
// | +-----------+ Yn-> | |
// rst---> +-----------+ |
// | |
// clk---> |
// +---------------------------------------+
//
// +-+ +-+ +-+ +-+ +-+ + +-+ +-+ +-+ +-+ +-+ +-+ +-+
// | | | | | | | | | | | | | | | | | | | | | | | | |
// clk --+ +-+ +-+ +-+ +-+ +-.....-+-+ +-+ +-+ +-+ +-+ +-+ +-+ +-
// 0 1 2 3 N-1 N N+1
// : : : : : : : :
// +---+ : +---+ : :
// | | : : | | : :
// req_X--+ +-----------------------+ +---------
// : : : : : : : :
// +--*+---+---+---+-- --+--*+---+---+---+--
// in_X---| X0| X1| X2| X3| ......... |X"0|X"1|X"2|X"3|
// +---+---+---+---+-- --+---+---+---+---+--
// : : : :
// +---+ +---+
// | | | |
// rdy_Y----------+ +------.........--------+ +---------------
// : : : :
// --+---+---+-- --+---+---+---+---+--
// out_Y--...Y'n-1|Y'0|Y'1| ......... Y'n-1| Y0| Y1| Y2| ......
// --+---+---+-- --+---+---+---+---+--
//
//////////////////////////////////////////////////////////////////////////////
import dct_defs::*;
module dct_top(
input clk,
input rst,
input [WIDTH_X-1:0] in_X,
output req_X,
output [WIDTH_X-1:0] out_X,
output [WIDTH_Y-1:0] out_Y,
output rdy_Y );
// Instantiate DCT Coeff. ROM generator
wire [WIDTH_W-1:0] W[POINT_N];
logic init_ACC[POINT_N];
dct_W_ROM U_dct_ROM( .clk(clk),
.rst(rst),
.req(req_X),
.init_ACC(init_ACC),
.W(W));
// Generate DCT processing elements
genvar i;
logic [WIDTH_X-1:0] temp_X[POINT_N];
logic [WIDTH_Y-1:0] temp_Y[POINT_N];
logic valid_Y[POINT_N];
// First PE
dct_PE U_dct_PE0( .clk(clk),
.rst(rst),
.in_X(in_X),
.in_W(W[0]),
.init_ACC(init_ACC[0]),
.out_X(temp_X[0]),
.valid_Y(valid_Y[0]),
.out_Y(temp_Y[0]));
// Pipelined cascade PEs
for (i=1; i<POINT_N; i++)
begin
dct_PE U_dct_PEn( .clk(clk),
.rst(rst),
.in_X(temp_X[i-1]),
.in_W(W[i]),
.init_ACC(init_ACC[i]),
.out_X(temp_X[i]),
.valid_Y(valid_Y[i]),
.out_Y(temp_Y[i]));
end
// DCT OUT with De-Scaling
dct_Out U_dct_Out( .clk(clk),
.rst(rst),
.valid_Y(valid_Y),
.in_Y(temp_Y),
.out_Y(out_Y),
.rdy_Y(rdy_Y));
// Output X
assign out_X = temp_X[POINT_N-1];
endmodule
-------------------------------------------------------------
//
// Project: N-Point DCT
// File : dct_Testbench.sv
// Author : Kook, goodkook@gmail.com
//
import dct_defs::*;
module dct_Testbench();
logic clk, rst, req_X, rdy_Y, _Error_;
logic [WIDTH_X-1:0] in_X;
logic [WIDTH_X-1:0] out_X;
logic [WIDTH_Y-1:0] out_Y;
integer idx_X, idx_Y;
// Instantiate DUT
dct_top DUT_dct_top ( .clk(clk),
.rst(rst),
.in_X(in_X),
.req_X(req_X),
.out_X(out_X),
.out_Y(out_Y),
.rdy_Y(rdy_Y));
// Instantiate SC module: Stimulus generator
gen_stim U_gen_stim ( .clk(clk),
.req_X(req_X),
.out_X(in_X),
.idx_X(idx_X));
// Instance SC module: Check-Out
check_out u_check_out ( .clk(clk),
.req_X(req_X),
.in_X(in_X),
.rdy_Y(rdy_Y),
.in_Y(out_Y),
.idx_Y(idx_Y),
._Error_(_Error_));
// Clock Generator
always
begin
clk = 0;
forever #10 clk = ~clk;
end
// Reset
initial begin
#0 rst = 0;
#5 rst = 1;
#15 rst = 0;
end
/* **************************************
// Input X
initial begin
in_X =0;
@(posedge req_X);
@(posedge req_X);
in_X = 2048;
@(posedge req_X);
in_X = 1024;
@(posedge req_X);
in_X = 2048;
@(posedge req_X);
in_X = 0;
@(posedge req_X);
forever @(posedge clk)
in_X = $random;
end
****************************************** */
endmodule
---------------------------------------------
//
// Project: N-Point DCT
// File : gen_stim.h
// Author : Kook, goodkook@gmail.com
//
#ifndef INCLUDED_GEN_STIM
#define INCLUDED_GEN_STIM
#include <systemc.h>
#include "../c/dct.h"
SC_MODULE(gen_stim)
{
public:
// Ports
sc_in<sc_logic> clk;
sc_in<sc_logic> req_X;
sc_out<sc_uint<DW> > out_X;
sc_out<int> idx_X;
// Internal Signals
sc_signal<sc_uint<DW> > nTest;
// Methods
void stimulus();
SC_CTOR(gen_stim)
: clk("clk"),
req_X("req_X"),
out_X("out_X"),
idx_X("idx_X")
{
SC_METHOD(stimulus);
sensitive_pos << clk;
dont_initialize();
// Other constructor
long ltime;
int stime;
// get the current calendar time
ltime = time(NULL);
stime = (unsigned) ltime/2;
srand(stime); // Init. seed for random number gen.
}
// Destructor does nothing
~gen_stim()
{
}
};
#endif
-------------------------------------------
//
// Project: N-Point DCT
// File : gen_stim_stimulus.cpp
// Author : Kook, goodkook@gmail.com
//
#include "gen_stim.h"
#include <time.h>
#include <math.h>
#define my_cos(a,p,n) \
(a * cos((float)p*M_PI*(float)n/(float)DCT_SIZE))
#define my_sin(a,p,n) \
(a * sin((float)p*M_PI*(float)n/(float)DCT_SIZE))
void gen_stim::stimulus()
{
static int nTestSet = 0;
static int nCount;
// Check for req_X as start of new input frame
if (req_X.read()=='1')
{
nCount = 0;
// Constant input
if (nTestSet==0) out_X.write(0);
else if (nTestSet==1) out_X.write(1024);
else if (nTestSet==2) out_X.write(2048);
// cos() input
else if (nTestSet==3)
out_X.write((sc_uint<16>)my_cos(2048, 1.0, nCount));
else if (nTestSet==4)
out_X.write((sc_uint<16>)my_sin(2048, 1.0, nCount));
else if (nTestSet==5)
out_X.write((sc_uint<16>)rand());
}
// Stimulus generation during in-frame,
else
{
nCount++;
if (nTestSet==3) // cos()
out_X.write((sc_uint<16>)my_cos(2048, 1.0, nCount));
else if (nTestSet==4) // sin()
out_X.write((sc_uint<16>)my_cos(2048, 1.0, nCount));
else if (nTestSet==5) // rand()
out_X.write((sc_uint<16>)rand());
if (nCount==(DCT_SIZE-1))
if (nTestSet<5) nTestSet++;
}
// Signal
nTest.write((sc_uint<16>)nCount);
idx_X.write(nCount);
}
--------------------------------------------------------
//
// Project: N-Point DCT
// File : check_out.h
// Author : Kook, goodkook@gmail.com
//
#ifndef INCLUDED_CHECK_OUT
#define INCLUDED_CHECK_OUT
#include <systemc.h>
#include "../c/dct.h"
SC_MODULE(check_out)
{
public:
// Ports
sc_in<sc_logic> clk;
sc_in<sc_logic> req_X;
sc_in<sc_uint<DW> > in_X;
sc_in<sc_logic> rdy_Y;
sc_in<sc_uint<DW> > in_Y;
sc_out<int> idx_Y;
sc_out<sc_logic> _Error_;
// Internal Signals
sc_signal<sc_uint<16> > nTest;
// Internal variables for DCT Algorithm
dct_data_t src[DCT_SIZE], _src[DCT_SIZE], __src[DCT_SIZE]; // Inputs
dct_data_t dst[DCT_SIZE], _dst[DCT_SIZE], __dst[DCT_SIZE]; // C-Result
dct_data_t rtl[DCT_SIZE]; // RTL Result
// Methods
void Do_C_Model(); // Receive and C-Model
void Do_Compare(); // Compare RTL vs. C Results
SC_CTOR(check_out)
: clk("clk"),
req_X("req_X"),
in_X("in_X"),
rdy_Y("rdy_Y"),
in_Y("in_Y"),
idx_Y("idx_Y"),
_Error_("_Error_")
{
SC_METHOD(Do_C_Model);
sensitive << clk; // Sensitize clk on both edge
dont_initialize();
SC_METHOD(Do_Compare);
sensitive_pos << clk;
dont_initialize();
// Initialize local vars
for (int i=0; i<DCT_SIZE; i++)
{
src[i] = _src[i] = __src[i] = 0;
dst[i] = _dst[i] = __dst[i] = 0;
rtl[i] = 0;
}
_Error_ = sc_logic('0');
}
// Destructor does nothing
~check_out()
{
}
};
#endif
------------------------------------------------------------
// Project: N-Point DCT
// File : check_out_model.cpp
// Author : Kook, goodkook@gmail.com
//
#include "check_out.h"
// METHOD:
// Receive Data and Do C-Model
void check_out::Do_C_Model()
{
static int nCount;
if (req_X.read()=='1')
{
if (clk.negedge())
{
// req_X:
// Do C-Model with previous frame data before saving new.
// Last in_X
src[nCount] = (dct_data_t)in_X.read();
// Call C-Model DCT
dct_1d(src, dst);
// DEBUG: Backup Inputs and C-Model results
for (int i=0; i<DCT_SIZE; i++)
{
_src[i] = src[i];
_dst[i] = dst[i];
}
}
else if (clk.posedge())
{
// Start new frame
nCount = 0;
}
}
else if (clk.negedge())
{
src[nCount] = (dct_data_t)in_X.read();
nCount++;
}
// Signal
//nTest.write((sc_uint<16>)nCount);
}
// METHOD:
// Compare RTL vs. C Result
void check_out::Do_Compare()
{
static int nCount;
dct_data_t temp_Y = (dct_data_t)in_Y.read();
if (rdy_Y.read()=='1')
{
// rdy_Y:
// As new result started,
// Compare RTL-Model with previous frame result
// DEBUG: Show inputs
printf("\nDCT Inputs:");
for (int i=0; i<DCT_SIZE; i++)
printf("%10d", __src[i]);
// DEBUG: Show C-Model results
printf("\nDCT Result:");
for (int i=0; i<DCT_SIZE; i++)
printf("%10d", __dst[i]);
// DEBUG: Show RTL result
printf("\nRTL Result:");
for (int i=0; i<DCT_SIZE; i++)
printf("%10d", rtl[i]);
printf("\n===============================================================\n");
// Compare two results
for (int i=0; i<DCT_SIZE; i++)
{
if (rtl[i]!=__dst[i])
{
_Error_ = sc_logic('1');
printf("ERROR at [%d]: Cout=%d RTLout=%d", i, dst[nCount], temp_Y);
printf("\n===============================================================\n");
}
else
_Error_ = sc_logic('0');
}
// DEBUG: Backup Inputs and C-Model results
for (int i=0; i<DCT_SIZE; i++)
{
__src[i] = _src[i];
__dst[i] = _dst[i];
}
nCount = 0;
rtl[nCount] = temp_Y;
}
else
{
if (nCount<(DCT_SIZE-1))
{
nCount++;
rtl[nCount] = temp_Y;
}
}
// Showing out_Y index, 1-clock delayed
//idx_Y.write(nCount);
idx_Y.write( ((nCount+1)>=DCT_SIZE)? 0:nCount+1 );
// Signal
//nTest.write((sc_uint<16>)nCount);
}
댓글 없음:
댓글 쓰기