module mult #( parameter WIDTH=32, parameter STAGES=4 ) ( input clock, input enable, input [WIDTH-1:0] A, input [WIDTH-1:0] B, output valid, output [WIDTH-1:0] O ); // Retiming really is magical... wire [WIDTH-1:0] prod = A * B; shift_reg #(.WIDTH(1), .DEPTH(STAGES)) shr_valid (.clock(clock), .in(enable), .out(valid)); shift_reg #(.WIDTH(WIDTH), .DEPTH(STAGES)) shr_mul (.clock(clock), .in(prod), .out(O)); endmodule module shift_reg #( parameter WIDTH=1, parameter DEPTH=4 ) ( input clock, input [WIDTH-1:0] in, output [WIDTH-1:0] out ); reg [WIDTH-1:0] state [DEPTH-1:0]; assign out = state[DEPTH-1]; integer i; always @(posedge clock) begin state[0] <= in; for (i = 1; i < DEPTH; i=i+1) begin state[i] <= state[i-1]; end end endmodule