
module mem_router_reorder #(    
    parameter integer DATA_W = 256,                 // > 0
    parameter integer BURSTCOUNT_W = 1,             // > 0
    parameter integer ADDRESS_W = 32,               // > 0 (word address)
    parameter integer BYTEENA_W = DATA_W / 8,       // > 0

    parameter integer NUM_BANKS = 8,                // > 1
    parameter integer BANK_SEL_BIT = ADDRESS_W-$clog2(NUM_BANKS),   // ADDRESS_W-$clog2(NUM_BANKS) > x >= BURSTCOUNT_W

    parameter integer BANK_MAX_PENDING_READS = 2*2**BURSTCOUNT_W-4,   // > 0
    parameter integer BANK_MAX_PENDING_WRITES = 2047,   // > 0

    parameter ASYNC_RESET = 1,
    parameter SYNCHRONIZE_RESET = 0
)
(
    input logic clock,
    input logic resetn,

    // Bank select (one-hot)
    input logic [NUM_BANKS-1:0] bank_select,

    // Host
    input logic m_arb_request,
    input logic m_arb_read,
    input logic m_arb_write,
    input logic [DATA_W-1:0] m_arb_writedata,
    input logic [BURSTCOUNT_W-1:0] m_arb_burstcount,
    input logic [ADDRESS_W-1:0] m_arb_address,
    input logic [BYTEENA_W-1:0] m_arb_byteenable,

    output logic m_arb_stall,

    output logic m_wrp_ack,

    output logic m_rrp_datavalid,
    output logic [DATA_W-1:0] m_rrp_data,

    // To each bank
    output logic b_arb_request [NUM_BANKS],
    output logic b_arb_read [NUM_BANKS],
    output logic b_arb_write [NUM_BANKS],
    output logic [DATA_W-1:0] b_arb_writedata [NUM_BANKS],
    output logic [BURSTCOUNT_W-1:0] b_arb_burstcount [NUM_BANKS],
    output logic [ADDRESS_W-$clog2(NUM_BANKS)-1:0] b_arb_address [NUM_BANKS],
    output logic [BYTEENA_W-1:0] b_arb_byteenable [NUM_BANKS],

    input logic b_arb_stall [NUM_BANKS],

    input logic b_wrp_ack [NUM_BANKS],  // MUST BE USED OR WILL HANG! Set to 1

    input logic b_rrp_datavalid [NUM_BANKS],
    input logic [DATA_W-1:0] b_rrp_data [NUM_BANKS]
);

    localparam READ_DATA_FIFO_DEPTH = BANK_MAX_PENDING_READS;

    localparam READ_BANK_SELECT_FIFO_DEPTH = NUM_BANKS > READ_DATA_FIFO_DEPTH ? NUM_BANKS : READ_DATA_FIFO_DEPTH;

    // +1 : do not stall when pipeline is full
    localparam WRITE_ACK_FIFO_DEPTH = BANK_MAX_PENDING_WRITES * NUM_BANKS + 1;

  // synchronize resetn input
  logic aclrn;
  logic sclrn;
  logic resetn_synchronized; 
  acl_reset_handler #(
    .ASYNC_RESET           (ASYNC_RESET),
    .USE_SYNCHRONIZER      (SYNCHRONIZE_RESET),
    .SYNCHRONIZE_ACLRN     (0),
    .PIPE_DEPTH            (2),
    .NUM_COPIES            (1)
  ) acl_reset_handler_inst (
    .clk                   (clock),
    .i_resetn              (resetn),
    .o_aclrn               (aclrn),
    .o_sclrn               (sclrn),
    .o_resetn_synchronized (resetn_synchronized)
  );

    genvar i;

    logic [NUM_BANKS-1:0] m_arb_bank_sel;
    logic [ADDRESS_W-$clog2(NUM_BANKS)-1:0] m_arb_bank_address;

    // Split address into bank_sel and bank_address (within a bank)
    acl_iface_address_to_bankaddress #(
      .ADDRESS_W( ADDRESS_W ),
      .NUM_BANKS( NUM_BANKS ),
      .BANK_SEL_BIT( BANK_SEL_BIT))
      a2b (
        .address(m_arb_address),
        .bank_sel_1hot(m_arb_bank_sel),
        .bank_address(m_arb_bank_address));

    // Request.
    generate
    begin:req
        integer req_b;
        logic stall;

        always_comb
        begin
            stall = 1'b0;

            for( req_b = 0; req_b < NUM_BANKS; req_b = req_b + 1 )
            begin:bank
                b_arb_request[req_b] = m_arb_request & m_arb_bank_sel[req_b] & ~(rrp.stall | wrp.stall);
                b_arb_read[req_b] = m_arb_read & m_arb_bank_sel[req_b] & ~(rrp.stall | wrp.stall);
                b_arb_write[req_b] = m_arb_write & m_arb_bank_sel[req_b] & ~(rrp.stall | wrp.stall);
                b_arb_writedata[req_b] = m_arb_writedata;
                b_arb_burstcount[req_b] = m_arb_burstcount;
                b_arb_address[req_b] = m_arb_bank_address;
                b_arb_byteenable[req_b] = m_arb_byteenable;

                stall |= b_arb_stall[req_b] & m_arb_bank_sel[req_b];
            end
        end
    end
    endgenerate

    // Read return path. Need to handle the two problems:
    //  1) Data is returned in a different bank order than the order in which
    //     the banks were issued.
    //  2) Multiple data words arrive in the same cycle (from different banks).
    generate
    begin:rrp
        integer rrp_b;
        logic stall;

        logic [NUM_BANKS-1:0] bs_in, bs_out, bank_df_valid, bank_df_no_free;
        logic [BURSTCOUNT_W-1:0] bs_burstcount, bs_burstcounter;
        logic bs_read, bs_write, bs_full, bs_empty, bs_valid, bs_doneburst;
        logic [DATA_W-1:0] bank_df_out [NUM_BANKS];

        // Bank select FIFO. Tracks which bank the next valid read data
        // should come from. Data is assumed to be one-hot encoded.
        hld_fifo #(
          .STYLE("hs"),
          .DEPTH(READ_BANK_SELECT_FIFO_DEPTH),
          .WIDTH(BURSTCOUNT_W+NUM_BANKS),
          .SYNCHRONIZE_RESET(0)
        )
        bs_fifo (
            .clock    (clock),
            .resetn   (resetn_synchronized),
            .i_data   ({m_arb_burstcount,bs_in}),
            .i_valid  (bs_write),
            .o_data   ({bs_burstcount,bs_out}),
            .i_stall  (!(bs_doneburst && bs_read)),
            .o_empty  (bs_empty),
            .o_stall  (bs_full)
        );

        assign bs_doneburst = (bs_burstcounter == bs_burstcount);
        always @(posedge clock or negedge aclrn)
          if (!aclrn)
            bs_burstcounter <= 'b01;
          else
          begin
            if (bs_doneburst && bs_read) bs_burstcounter <= 'b01;
            else if (bs_read) bs_burstcounter <= bs_burstcounter + 'b01;
            if(!sclrn) bs_burstcounter <= 'b01;
          end

        // Per-bank logic.
        for( i = 0; i < NUM_BANKS; i = i + 1 )
        begin:bank
            // Data FIFO.
            // The FIFO stalls (using the o_almost_full signal) when the available
            // space drops below one max burst. One max burst size is 2**(BURSTCOUNT_W-1) words,
            // so the o_almost_full will be asserted if the remaining FIFO space is
            // (2**(BURSTCOUNT_W-1) or less 
            logic [DATA_W-1:0] df_in, df_out;
            logic df_read, df_write, df_full, df_empty;
            logic o_almost_full;

            hld_fifo #(
              .STYLE("hs"),
              .DEPTH(READ_DATA_FIFO_DEPTH),
              .WIDTH(DATA_W),
              .ALMOST_FULL_CUTOFF(2**(BURSTCOUNT_W-1)),
              .SYNCHRONIZE_RESET(0)
            )
            data_fifo (
              .clock        (clock),
              .resetn       (resetn_synchronized),
              .i_data       (df_in),
              .i_valid      (df_write),
              .o_data       (df_out),
              .i_stall      (!df_read),
              .o_empty      (df_empty),
              .o_stall      (df_full),
              .o_almost_full(o_almost_full)
              );

            // Data FIFO assignments.
            assign df_in = b_rrp_data[i];
            assign df_write = b_rrp_datavalid[i];
            assign df_read = bs_valid & bs_out[i] & bank_df_valid[i];

            assign bank_df_valid[i] = ~df_empty;
            assign bank_df_out[i] = df_out;

            assign bank_df_no_free[i] = o_almost_full; // Stall when the available space drops below one max burst.
        end

        // Bank select FIFO assignments.
        assign bs_in = m_arb_bank_sel;
        assign bs_write = m_arb_read & ~(req.stall | stall);
        assign bs_read = bs_valid & |(bs_out & bank_df_valid);
        assign bs_valid = ~bs_empty;

        // Stall the current read request if the bank select FIFO is full or
        // if the bank data FIFO has no free entries.
        assign stall = m_arb_read & (bs_full | |(bs_in & bank_df_no_free));

        // RRP output signals.
        logic [DATA_W-1:0] rrp_data;
        always_comb
        begin
            rrp_data = '0;

            for( rrp_b = 0; rrp_b < NUM_BANKS; rrp_b = rrp_b + 1 )
                rrp_data |= bs_out[rrp_b] ? bank_df_out[rrp_b] : '0;
        end

        always @(posedge clock or negedge aclrn)
          if( !aclrn ) m_rrp_datavalid <= 1'b0;
          else
          begin
            m_rrp_datavalid <= bs_read;     
            if(!sclrn) m_rrp_datavalid <= 1'b0;
          end

        always @( posedge clock )
            m_rrp_data <= rrp_data;
    end
    endgenerate

    // Write return path. Need to handle one problem:
    //  1) Multiple write acks arrive in the same cycle (from different banks).
    generate
    begin:wrp
        integer wrp_b;
        logic stall;

        // "FIFO" of acks to send out. This is just a counter that counts
        // the number of wrp acks still left to send out, minus one (so a value
        // of -1 indicates no wrp acks left to send out). This allows for
        // a single bit to mean zero.
        //
        // The range of values stored by this counter is 
        // [-1, WRITE_ACK_FIFO_DEPTH - 1]. Initial value is -1.
        logic [$clog2(WRITE_ACK_FIFO_DEPTH):0] ack_counter;
        logic decr_ack_counter;
        logic [$clog2(NUM_BANKS + 1)-1:0] ack_counter_incr;
        logic has_acks;

        // Counter to track the number of free entries in the ack "FIFO",
        // minus one (so a value of -1 indicates no more free entries). This
        // allows for a single bit to mean no more free entries.
        //
        // The range of values stored by this counter is
        // [-1. WRITE_ACK_FIFO_DEPTH - 1]. Initial value is
        // WRITE_ACK_FIFO_DEPTH - 1.
        logic [$clog2(WRITE_ACK_FIFO_DEPTH):0] ack_free;
        logic incr_ack_free, decr_ack_free;
        logic ack_no_free;

        // Logic for ack counter.
        always @( posedge clock or negedge aclrn )
          if( !aclrn ) ack_counter <= '1;  // -1
          else
          begin
            ack_counter <= ack_counter + ack_counter_incr - decr_ack_counter;
            if(!sclrn) ack_counter <= '1;  // -1
          end

        assign decr_ack_counter = has_acks;
        assign has_acks = ~ack_counter[$bits(ack_counter) - 1];

        always_comb
        begin
            // In any given cycle, each bank can assert its wrp ack signal
            // and so the ack counter can increase by
            ack_counter_incr = '0;

            for( wrp_b = 0; wrp_b < NUM_BANKS; wrp_b = wrp_b + 1 )
                ack_counter_incr += b_wrp_ack[wrp_b];
        end

        // Logic for free entries counter.
        always @( posedge clock or negedge aclrn )
          if(!aclrn) ack_free <= WRITE_ACK_FIFO_DEPTH - 1;
          else
          begin
            ack_free <= ack_free + incr_ack_free - decr_ack_free;
            if(!sclrn) ack_free <= WRITE_ACK_FIFO_DEPTH - 1;
          end

        assign incr_ack_free = decr_ack_counter;
        assign decr_ack_free = m_arb_write & ~(req.stall | ack_no_free);
        assign ack_no_free = ack_free[$bits(ack_free) - 1];

        // Stall if the current request is a write and the write ack fifo has
        // no more free entries.
        assign stall = m_arb_write & ack_no_free;

        // Wrp ack signal.
        assign m_wrp_ack = has_acks;
    end
    endgenerate

    // Stall signal.
    assign m_arb_stall = req.stall | rrp.stall | wrp.stall;

endmodule

