// 
//			tuner2x.v - DDC/DUC and 3-stage Quadrature Low-Pass Filter
//				with CIC and FIR LPFs clocked at 2x sample rate
//
//					(C) Copyright 2006-2008 John B. Stephensen
//
// This Verilog source file and all its derivatives are licensed only for
// personal non-profit educational use in the Amateur Radio Service and
// the license is not transferrable. The information is provided as-is for
// experimental purposes and the author does not warranty its freedom
// from defects or its suitability for any specific application.
//
//	This module contains a direct digital synthesizer (DDS), a quadrature mixer and three
// 2-channel low-pass filters (LPFs). It is used to convert ADC data at the DDS output
// frequency to baseband or translate baseband data to the DDS frequency for the DAC.
//
// The ADC and DAC ports both use 16-bit signed fractional numbers and are synchronized
// with SCLK. The baseband I/O is synchronized to MCLK and uses complex numbers with
// parallel X and Y axis samples. The input consumes 16-bit X and Y samples and the output
// provides 20-bit X and Y samples. Internal operations are synchronized to DCLK. X-axis
// samples are present when SCLK is 1 and Y-axis samples are present when SCLK is 0. In
// receive mode, ADC is a real data input centered on frequency FRQ that is downconverted
// to baseband centered around DC. DOX and DOY are the baseband outputs with OV signalling
// that the data is valid. In transmit mode, DIX and DIY are the baseband inputs with IV
// indicating valid data. DAC is a real data output centered on frequency FRQ. 
//
// The 3 stages of filtering are cascaded with the CIC filter first on receive and last on
// transmit. The CIC filter has 4 stages and can decimate or interpolate by 10-256. The two
// FIR filters have variable coefficients with up to 511 taps and can each interpolate or
// decimate by 1-64. The usable decimation and interpolation range for the 3 cascaded
// filters is 40-160,000 allowing 500-2,000,000 baseband sample rates with an 80 MSPS ADC
// and DAC. A noise blanker is located between the two FIR filters. Receive mode is selected
// when XMT is 0 and transmit mode is selected when XMT is 1.
//
// Filter coefficients and other configuration data are loaded via the 16-bit port DIN
// when IOCS and IOWR are both active. IOADDR specifies one of 8 registers:
//
//		 15  14  13  12  11  10   9   8   7   6   5   4   3   2   1   0
//		+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//	 0 |                         LSW Frequency                         |
//		+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//	 1 |                         MSW Frequency                         |
//		+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//	 2 |                Integer                |       Fraction        | CIC Gain
//		+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//	 3 |   Exponent    |       |     CIC Interpolation/Decimation      |
//		+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//  4 |   |       Blanker Threshold       |                           |
//		+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//	 5 |       |   FIR 2 Decimation    |       |   FIR 1 Decimation    | (reset)
//		+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//	 6 |                           |    FIR Filter 1/4 Instruction     | Coefficient Load
//	   +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//	 7 |                                                       |RST|FIR| FIR Select
//	   +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//
//		CIC Gain = integer.fraction x 2^exponent
//		RST - reset FIR filters
//		FIR - select filter to load
//
// The CIC decimation/interpolation factor may be set between 10-720 (load 9-719). Transmit
// gain is the interpolation factor to the fourth power and receive gain is the decimation
// factor to the fourth power. When receiving, 38 extra bits are provided to allow for a CIC
// gain up to 274 x 10^9 while retaining 18-bit resolution. When transmitting, 28 extra bits
// are provided to allow for a CIC gain up to 268 x 10^6. Unity overall gain is acheived by
// multiplying the input by a factor set by 3 fields and is an integer between 0 and 1023
// plus a fraction between 0 and 63/64 times a binary exponent between 2^0 and 2^15. On
// transmit the value of the integer field is limited to 64 to reserve 4 bits in the comb
// filter for bit growth.
//
//	Blanker Threshold sets the signal X and Y magnitude above with samples are zeroed.
//
// FIR 1 and FIR 2 decimation factors may be set to 1-64 by loading the values 0-63. The FIR
// interpolation factors are determined by the number of writes in the instructions loaded
// into each filter. They are loaded sequentially in 9-bit quarter-instructions and 512 must
// be loaded. Unused location may be written with 0 (NOP). Writing register 5 resets both
// the FIR 1 and FIR 2 instruction counters.
//
// There is one status port with 3 bits indicating ADC, mixer, DAC adder, FIR filter 1
// and/or FIR filter 2 overflow. There are all reset when read.
//
//		 15  14  13  12  11  10   9   8   7   6   5   4   3   2   1   0
//		+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//    |                                           |ADC|MIX|DAC|FL1|FL2|
//		+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
//
// CIC4H56: 1205 slices (25% XC3S500E), 7 multipliers and 5 block RAMs are used. Maximum
// clock speeds are SCLK: 115 MHz, DCLK: 194 MHz and MCLK: 234 MHz. (1214 slices w/doppler)
// CIC4H48: 1137 slices (23% XC3S500E), 7 multipliers and 5 block RAMs are used. Maximum
// clock speeds are SCLK: 115 MHz, DCLK: 194 MHz and MCLK: 234 MHz. Speed limited by DDS.
//
// Normal Warnings:
//		Signals <adj7<35>> and <adj7<16:0>> are assigned but never used.
//		Signals ppl<35>, ppl<2:0> and ppu<7:0> are assigned but never used.
//		Signals <irh<35:34>> is assigned but never used.
//		Signal <mixo<16:0>> is assigned but never used.
//		Signal <add<0>> is assigned but never used.
//		Signal <prod<35:34>> and <prod<5:0>> are assigned but never used.
//		Signal <size<3:0>> and <size1<7:0>> are assigned but never used.
//		Signal <cin<15>> and <cin<6:0>> are assigned but never used.
// 
// History:
//		4-28-09	inverted DAC MSB in rounder to equalize propagation delays
//		1-13-10	change to 56-bit CIC filter
//		1-22-10	change to cos/-sin DDS and adder at quadrature mixer transmit output
//		4-26-10	20-bit output on last FIR filter (sfir18x24x20x2) 197->190 MHz DCLK
//		4-27-10	add 1 in X and Y registers to offset truncation error in mixer and CIC
//					DCLK 190->194 MHz (+17 slices)
//
module tuner2x( 
				 iocs, ioaddr, din, iowr, dout, iord, mclk, mrst,
				 adc, ovf, dac, sclk, dclk,
				 rfd, dix, diy, iv, dox, doy, ov,
				 xmt, full, rdy1, rdy2
				);
	 input iocs;				// select this module
//	 input iocs1;				// select doppler simulator
	 input [2:0] ioaddr;		// wselect configuration register
	 input [15:0] din;		// configuration data
	 input iowr;				// write program data
	 output [15:0] dout;		// status
	 input iord;				// read status
	 input mclk,mrst;			// master clock and reset (64 MHz)
	 input [15:0] adc;		// signed input at SCLK rate
	 input ovf;					// ADC overflow
	 output [15:0] dac;		// offset binary output at SCLK rate
	 input sclk;				// sample clock (80 MHz)
	 input dclk;				// double rate clock (160 MHz)
	 output rfd;				// ready for baseband data
    input [15:0] dix,diy;	// complex data input synchronized to SCLK
    input iv;					// input valid
    output [19:0] dox,doy;	// complex data output synchronized to SCLK
    output ov;					// valid output
	 input xmt;					// 0=receive, 1=transmit
	 output full;				// TEST
	 output rdy1,rdy2;
// internal signals
reg [5:0] dec1,dec2;			// configuration registers
reg reset,firsel;
reg [15:0] frq0;				// frequency register and LSW delay
reg [31:0] frq1;
//wire [31:0] frq2;				// frequency with doppler spread
wire [17:0] lpf0d;			// LPF 1 input
wire lpf0rdy,lpf0v;
wire [17:0] lpf1d;			// LPF 1 output
wire lpf1v,ovf1;
wire [17:0] lpf2d;			// LPF 2 input
wire lpf2rdy,lpf2v;
wire [19:0] lpf3d;			// LPF 2 output (20 bits)
wire lpf3v,ovf3;
wire ce;
wire [17:0] crdo;				// CIC output
wire crov;
wire [17:0] lo,mixi;			// multiplier (mixer) inputs
wire [35:0] mixo;				// multiplier output
reg [17:0] dly,add;			// adder to sum two mixer outputs for DAC
wire siv;						// IV synchronized to SCLK
reg [19:0] d,x,y;				// sync. FIR output to SCLK (20 bits)
reg v;
wire [17:0] fx,fy;			// FIFO output
wire frdy,full;				// FIFO ready for more data and FIFO full
reg mixov,dacov;				// overflow bits
reg adcovf,mixovf,dacovf,fir1ovf,fir2ovf;	// status flags
// TEST
assign rdy1 = lpf2rdy;
assign rdy2 = frdy;
// The CPU output is synchronized to SCLK for loading configuration data
reg [15:0] cin;	// latched data
reg [2:0] caddr;	// latched address
reg cwe;				// synchronized write enable
//reg cwe1;			// synchronized write enable for doppler simulator
//wire [15:0] cin;	// latched data
//wire [2:0] caddr;	// latched address
//wire cwe,cwe1;		// synchronized write enable
wire cwr0,cwr1,cwr2,cwr3,cwr4,cwr5,cwr6,cwr7;	// resister select
//assign cin = din;
//assign caddr = ioaddr;
//assign cwe = iocs & iowr;
//assign cwe1 = iocs1 & iowr;
// latch I/O address and data
always @ (posedge sclk)	// *** was mclk ***
begin
	if (iocs) cin <= din;
	if (iocs) caddr <= ioaddr;
//	if (iocs|iocs1) cin <= din;
//	if (iocs|iocs1) caddr <= ioaddr;
	cwe <= iocs & iowr;
//	cwe1 <= iocs1 & iowr;
end
// generate delayed write enable synchronized to SCLK
//async2h syniowr (
//	.a(iocs & iowr),
//	.aclk(mclk),
//	.arst(mrst),
//	.b(cwe),
//	.bclk(sclk),
//	.brst(1'b0)
//	);
// decode addresses
assign cwr0 = cwe & (caddr == 0);
assign cwr1 = cwe & (caddr == 1);
assign cwr2 = cwe & (caddr == 2);
assign cwr3 = cwe & (caddr == 3);
assign cwr4 = cwe & (caddr == 4);
assign cwr5 = cwe & (caddr == 5);
assign cwr6 = cwe & (caddr == 6);
assign cwr7 = cwe & (caddr == 7);
// configuration registers
always @ (posedge sclk)
begin
	if (mrst) frq0 <= 0;
	else if (cwr0) frq0 <= cin;		// two 16-bit transfers to write 32-bit frequency
	if (mrst) frq1 <= 32'h00000000;
	else if (cwr1) frq1 <= {cin,frq0};
	if (mrst) dec1 <= 0;
	else if (cwr5) dec1 <= cin[5:0];	// one write sets decimation for both FIR filters
	if (mrst) dec2 <= 0;
	else if (cwr5) dec2 <= cin[13:8];
	if (mrst) firsel <= 0;
	else if (cwr7) firsel <= cin[0];	// selects filter to load
	if (mrst) reset <= 1;
	else if (cwr7) reset <= cin[1];	// reset when powered up - clear when programmed
end
// Doppler simulator dithers LO frequency
//dopplersim dsim (
//	.fi(frq1),
//	.fo(frq2),
//	.cin(cin[2:0]),
//	.cwe(cwe1),
//	.clk(sclk),
//	.rst(mrst)
//	);
// DDS with 32-bit frequency input sets the ADC and DAC center
// frequencies. Resolution is SCLK/4G = 0.02 Hz at 80 Msps.
// Cosine/sine sequence starts after falling edge of SCLK and
// completes in one SCLK cycle.
dds28 dds (
	.doxy(lo),		// cosine and sine output
	.sclk(sclk),	// I/O sample rate
	.dclk(dclk),	// 2X SCLK
	.rst(reset),	// reset accumulator before loading new frequency
	.frq(frq1)		// frequency
);
// Mixer multiplies inputs by sine and cosine with 2 clock delay.
// Input may be the ADC (receive) or CIC filter (transmit).
// A, B and P registers enabled by default
MULT18X18SIO mixer (
	.A(xmt ? mixi : {adc,2'b00}),
	.B(lo),
	.P(mixo),
	.CLK(dclk),
	.CEA(1'b1),		// clocks always enabled
	.CEB(1'b1),
	.CEP(1'b1),
	.RSTA(1'b0),	// registers never reset
	.RSTB(1'b0),
	.RSTP(1'b0)
	);
// Adder sums 18-bits of mixer X and Y outputs
// X output delayed to coincide with Y output
always @ (negedge sclk)
	dly <= mixo[35:18];
always @ (posedge sclk)
	if (~xmt) add <= 0;
	else add <= dly + mixo[35:18];	// cos/-sin DDS
// MSB (extra sign bit) and LSB are discarded on output
assign dac = add[16:1];
// detect overflow in multiplier or adder
always @ (posedge dclk) mixov <= mixo[35] ^ mixo[34];
always @ (negedge sclk) dacov <= add[17] ^ add[16];
// CIC Filter - Converts 80 Msps mixer I/O to and from 320-8000 ksps for FIR
// filters. Decimates by 10-250 on receive or interpolates by 10-250 on transmit.
// X component present when SCLK high and Y component present when SCLK low.
// I/O is serial X and Y components of sample with data valid synchronized to SCLK
// and data synchronized to DCLK
CIC4H56 cic (
	.rdi(mixo[34:17]),// continuous input from mixer (receive)
	.tdix(fx),			// intermittant input from FIR LPF via FIFO (transmit)
	.tdiy(fy),
	.tie(ce),			// request new sample from FIFO
	.tdo(mixi),			// continuous interpolated output (transmit)
	.rdo(crdo),			// decimated output (receive)
	.rov(crov),			// cvalid output
	.sclk(sclk),		// sample clock
	.dclk(dclk),		// processing clock
	.rst(reset),
	.xmt(xmt),
	.cin(cin),			// configuration info.
	.cwr({cwr3,cwr2})	// 2 registers
	);
// synchronize input strobe to SCLK
async2h syniv (
	.a(iv),
	.aclk(mclk),
	.arst(mrst),
	.b(siv),
	.bclk(sclk),
	.brst(reset)
	);
// Switch FIR filter input between
// CIC filter output on receive and module input on transmit
// multiplex transmitter input using same scheme as CIC filter
MUX4X18 lpfmux (
	.D0(crdo),			// CIC output is 18 bits
	.D1(crdo),
	.D2({diy,2'b00}),	// expand transmit input to 18 bits
	.D3({dix,2'b00}),
	.S({xmt,sclk}),	// multiplex X and Y for filter input
	.Y(lpf0d)
	);
assign lpf0v = xmt ? siv : crov;
// 2 cascaded FIR filters and noise blanker
sfir512x18x24x2 fir1 (
	.rfd(lpf0rdy),
	.dixy(lpf0d),
	.iv(lpf0v),
	.oe(lpf2rdy|~xmt),
	.doxy(lpf1d),
	.ov(lpf1v),
	.ovf(ovf1),
	.sclk(sclk),
	.dclk(dclk),
	.mrst(reset),
	.dec(dec1),
	.pclk(sclk),
	.pdata(cin[8:0]),
	.pwr(cwr6 & ~firsel),
	.prst(cwr7)
	);
noiseblanker nb (
	.dixy(lpf1d),
	.iv(lpf1v),
	.doxy(lpf2d),
	.ov(lpf2v),
	.sclk(sclk),
	.dclk(dclk),
	.mrst(reset),
	.cin(cin),
	.cwe(cwr4)
	);
sfir512x18x24x20x2 fir2 (
	.rfd(lpf2rdy),
	.dixy(lpf2d),
	.iv(lpf2v),
	.oe(frdy|~xmt),
	.doxy(lpf3d),
	.ov(lpf3v),
	.ovf(ovf3),
	.sclk(sclk),
	.dclk(dclk),
	.mrst(reset),
	.dec(dec2),
	.pclk(sclk),
	.pdata(cin[8:0]),
	.pwr(cwr6 & firsel),
	.prst(cwr7)
	);
// synchronize X and Y output samples to SCLK and cancel DC offset
always @ (negedge sclk)
begin
	d <= lpf3d;	// delay first sample of pair
end
always @ (posedge sclk)
begin
	if (lpf3v) x <= d + 5;		// save first sample of pair
	if (lpf3v) y <= lpf3d + 5;	// save second sample of pair
	v <= lpf3v;						// delay output valid
end
// buffer in FIFO for retreival by CIC (xmt)
fifo16x36s fifo (
	.pdi({y[19:2],x[19:2]}),// truncate samples to 18 bits (transmit only)
	.iv(v),						// load from filter Y output and delayed X sample
	.oe(xmt & ce),				// unload when decimation counter overflows
	.pdo({fy,fx}),
	.ov(),
	.empty(),
	.full(full),
	.rfd(frdy),					// false when zero or one entry left
	.clk(sclk),
	.rst(reset)
	);
// synchronize output strobes to MCLK
async2h synrfd (
	.a(~xmt|lpf0rdy),
	.aclk(sclk),
	.arst(reset),
	.b(rfd),
	.bclk(mclk),
	.brst(mrst)
	);
async2h synov (
	.a(v),
	.aclk(sclk),
	.arst(reset),
	.b(ov),
	.bclk(mclk),
	.brst(mrst)
	);
// Collect status information
always @ (posedge sclk)
begin
	if (mrst) adcovf <= 0; else adcovf <= (adcovf|ovf) & ~(iord & iocs);
	if (mrst) mixovf <= 0; else mixovf <= (mixovf|mixov) & ~(iord & iocs);
	if (mrst) dacovf <= 0; else dacovf <= (dacovf|(dacov & xmt)) & ~(iord & iocs);
	if (mrst) fir1ovf <= 0; else fir1ovf <= (fir1ovf|ovf1) & ~(iord & iocs);
	if (mrst) fir2ovf <= 0; else fir2ovf <= (fir2ovf|ovf3) & ~(iord & iocs);
end
// connect module outputs
assign dox = x;
assign doy = y;
assign dout = {11'b00000000000,adcovf,mixovf,dacovf,fir1ovf,fir2ovf};
endmodule
