1. Title
RISC-V CPU Multicycle 구현
2. Category
"SystemVerilog", "CPU", "RISC-V"
3. Key Concepts
Single Cycle RISC-V CPU에서 Flip Flop으로 한 명령어를 여러 단계로 나눈 후, 컨트롤 로직에 FSM을 추가해 Multi Cycle을 구현한다.
멀티 사이클
1) 한 명령어를 여러 단계(IF, ID, EXE, MEM, WB)로 나누어 여러 클록 사이클에 걸쳐 실행한다.
2) 각 단계가 한 클록 사이클에 동작하므로, 싱글 사이클에 비해 클록 주기를 줄일 수 있다.
3) 명령어마다 필요한 단계 수가 다르므로, 서로 다른 CPI(Cycles Per Instruction)를 갖게된다.
4. BLOCK DIAGRAM / FSM
5. Code Review
`timescale 1ns / 1ps
module RV32I_Core (
input logic clk,
input logic reset,
// Instruction Memory Interface (ROM)
input logic [31:0] instr_data,
output logic [31:0] instr_addr,
// Data Memory Interface (RAM)
output logic data_mem_we,
output logic [31:0] data_mem_addr,
output logic [31:0] data_mem_wdata,
input logic [31:0] data_mem_rdata
);
// Control Signals
logic reg_file_we;
logic [3:0] alu_ctrl;
logic alu_src_sel;
logic wb_mux_sel;
logic branch_en;
logic jump_sel;
logic jalr_sel;
logic PC_en;
ControlUnit U_ControlUnit (.*);
DataPath U_DataPath (.*);
endmodule
`timescale 1ns/1ps
`include "../../include/defines.sv"
module ControlUnit (
input logic clk,
input logic reset,
input logic [31:0] instr_data,
output logic reg_file_we,
output logic [ 3:0] alu_ctrl,
output logic alu_src_sel,
output logic wb_mux_sel,
output logic data_mem_we,
output logic branch_en,
output logic jump_sel,
output logic jalr_sel,
output logic PC_en
);
wire [6:0] opcode = instr_data[6:0];
wire [2:0] func3 = instr_data[14:12];
wire [6:0] func7 = instr_data[31:25];
logic [7:0] controls;
typedef enum {FETCH, DECODE, R_EXE, I_EXE, L_EXE, L_MEM, L_WB, S_EXE, S_MEM, B_EXE, LU_EXE, AU_EXE, J_EXE, JL_EXE} state_e;
state_e state, state_next;
assign {reg_file_we, alu_src_sel, data_mem_we, wb_mux_sel, branch_en, jump_sel, jalr_sel, PC_en} = controls;
// State Transition
always_ff @(posedge clk, posedge reset) begin
if (reset) state <= FETCH;
else state <= state_next;
end
always_comb begin
state_next = state;
case (state)
FETCH: state_next = DECODE;
DECODE: begin
case (opcode)
`OP_TYPE_R: state_next = R_EXE;
`OP_TYPE_L: state_next = L_EXE;
`OP_TYPE_I: state_next = I_EXE;
`OP_TYPE_S: state_next = S_EXE;
`OP_TYPE_B: state_next = B_EXE;
`OP_TYPE_LU: state_next = LU_EXE;
`OP_TYPE_AU: state_next = AU_EXE;
`OP_TYPE_J: state_next = J_EXE;
`OP_TYPE_JL: state_next = JL_EXE;
endcase
end
R_EXE: state_next = FETCH;
L_EXE: state_next = L_MEM;
L_MEM: state_next = L_WB;
L_WB: state_next = FETCH;
I_EXE: state_next = FETCH;
S_EXE: state_next = S_MEM;
S_MEM: state_next = FETCH;
B_EXE: state_next = FETCH;
LU_EXE:state_next = FETCH;
AU_EXE:state_next = FETCH;
J_EXE: state_next = FETCH;
JL_EXE:state_next = FETCH;
endcase
end
// Control signals based on opcode
always_comb begin
controls = 8'b0000000;
case (state)
// {reg_file_we, alu_src_sel, data_mem_we, wb_mux_sel, branch_en, jump_sel, jalr_sel, PC_en} = controls
FETCH: controls = 8'b0_0_0_0_0_0_0_1;
DECODE: controls = 8'b0_0_0_0_0_0_0_0;
R_EXE: controls = 8'b1_0_0_1_0_0_0_0;
L_EXE: controls = 8'b0_1_0_0_0_0_0_0;
L_MEM: controls = 8'b0_1_0_0_0_0_0_0;
L_WB: controls = 8'b1_1_0_0_0_0_0_0;
I_EXE: controls = 8'b1_1_0_1_0_0_0_0;
S_EXE: controls = 8'b0_1_0_0_0_0_0_0;
S_MEM: controls = 8'b0_1_1_0_0_0_0_0;
B_EXE: controls = 8'b0_0_0_1_1_0_0_0;
LU_EXE: controls = 8'b1_1_0_1_0_0_0_0;
AU_EXE: controls = 8'b1_1_0_1_0_0_0_0;
J_EXE: controls = 8'b1_0_0_0_1_1_0_0;
JL_EXE: controls = 8'b1_0_0_0_1_1_1_0;
endcase
end
// ALU control logic based on opcode and func3/func7
always_comb begin
alu_ctrl = 3'b000;
case (opcode)
`OP_TYPE_R : alu_ctrl = {func7[5], func3}; // R-Type
`OP_TYPE_L : alu_ctrl = `ADD; // L-Type
`OP_TYPE_I : begin
if ({func7[5], func3} == 4'b1101)
alu_ctrl = `SRA;
else
alu_ctrl = {1'b0, func3}; // I-Type
end
`OP_TYPE_S : alu_ctrl = `ADD; // S-Type
`OP_TYPE_B : alu_ctrl = {func7[5], func3}; // B-Type
`OP_TYPE_LU : alu_ctrl = `LUI; // LU-Type
`OP_TYPE_AU : alu_ctrl = `AUIPC; // AU_Type
`OP_TYPE_J : alu_ctrl = {func7[5], func3}; // J-Type
`OP_TYPE_JL : alu_ctrl = {func7[5], func3}; // JL-Type
default: alu_ctrl = 4'bx; // Undefined opcode
endcase
end
endmodule
`timescale 1ns/1ps
module DataPath (
input logic clk,
input logic reset,
input logic [31:0] instr_data,
output logic [31:0] instr_addr,
input logic reg_file_we,
input logic [ 3:0] alu_ctrl,
input logic alu_src_sel,
input logic wb_mux_sel,
input logic branch_en,
input logic jump_sel,
input logic jalr_sel,
input logic PC_en,
output logic [31:0] data_mem_addr,
output logic [31:0] data_mem_wdata,
input logic [31:0] data_mem_rdata
);
logic [31:0] PC_Data, RegFileData1, RegFileData2, aluResult, immOut, aluSrcMuxOut;
logic [31:0] wrMuxOut, branch_enMuxOut, jalrMuxOut;
logic [31:0] data_mem_rdata_processed, J_PC_Data;
logic [31:0] DecReg_RegFileData1, DecReg_RegFileData2, DecReg_immOut;
logic [31:0] ExeReg_aluResult, ExeReg_branchMuxOut, ExeReg_RegFileData2;
logic [31:0] length_data_mem_wdata;
logic [31:0] MemReg_rdata;
logic branch_enMuxSel, branch_flag;
wire [2:0] func3 = instr_data[14:12];
assign data_mem_addr = ExeReg_aluResult;
assign branch_enMuxSel = (branch_flag & branch_en || (instr_data[6] && instr_data[2])); // B-Type, J-Type
// Program Counter
register_en U_PC (
.clk(clk),
.reset(reset),
.PCen(PC_en),
.d(ExeReg_branchMuxOut),
.q(instr_addr)
);
// Immediate Extension
extend U_Extend (
.instr_data(instr_data),
.immExt(immOut)
);
// Register File
RegisterFile U_RegFile (
.clk(clk),
.we(reg_file_we),
.RAddr1(instr_data[19:15]),
.RAddr2(instr_data[24:20]),
.WAddr(instr_data[11:7]),
.WData(wrMuxOut),
.RData1(RegFileData1),
.RData2(RegFileData2)
);
// ALU Source Multiplexer
mux_2x1 U_AluSrcMux (
.sel(alu_src_sel),
.x0 (DecReg_RegFileData2),
.x1 (DecReg_immOut),
.y (aluSrcMuxOut)
);
// ALU
alu U_ALU (
.alu_ctrl(alu_ctrl),
.a(DecReg_RegFileData1),
.b(aluSrcMuxOut),
.pc_input(instr_addr),
.branch_flag(branch_flag),
.result(aluResult)
);
// Data Memory Read Processing
LoadDataProcessor U_LoadDataProcessor (
.func3(func3),
.data_mem_rdata(MemReg_rdata),
.processed_data(data_mem_rdata_processed)
);
// Branch & Jump Logic
mux_2x1 U_branch_en_Mux (
.sel(branch_enMuxSel),
.x0 (32'd4),
.x1 (DecReg_immOut),
.y (branch_enMuxOut)
);
mux_2x1 U_JAL_Mux (
.sel(jalr_sel),
.x0 (instr_addr),
.x1 (DecReg_RegFileData1),
.y (jalrMuxOut)
);
adder U_Adder_PC (
.a(jalrMuxOut),
.b(branch_enMuxOut),
.y(PC_Data)
);
adder U_J_Adder_PC(
.a(instr_addr),
.b(32'd4),
.y(J_PC_Data)
);
// Writeback MUX
mux_3x1 U_wrMux (
.sel({jump_sel, wb_mux_sel}),
.x0 (data_mem_rdata_processed),
.x1 (aluResult),
.x2 (J_PC_Data),
.y (wrMuxOut)
);
// Data Memory Write Processing
length_sel U_LENGTH_SEL(
.data(DecReg_RegFileData2),
.ram_data(data_mem_rdata_processed),
.func3(func3),
.size_data(ExeReg_RegFileData2)
);
/////////////////////////////////////////////////////////////////////////////////////////
//Flip - Flop For Multi Cycle Process
register U_ExeReg_BranchMux(
.clk(clk),
.reset(reset),
.d(PC_Data),
.q(ExeReg_branchMuxOut)
);
register U_DecReg_Ext(
.clk(clk),
.reset(reset),
.d(immOut),
.q(DecReg_immOut)
);
register U_MemReg_rData(
.clk(clk),
.reset(reset),
.d(data_mem_rdata),
.q(MemReg_rdata)
);
register U_ExeReg_Wdata(
.clk(clk),
.reset(reset),
.d(ExeReg_RegFileData2),
.q(data_mem_wdata)
);
register U_DecReg_RF1(
.clk(clk),
.reset(reset),
.d(RegFileData1),
.q(DecReg_RegFileData1)
);
register U_DecReg_RF2(
.clk(clk),
.reset(reset),
.d(RegFileData2),
.q(DecReg_RegFileData2)
);
register U_ExeReg_ALU(
.clk(clk),
.reset(reset),
.d(aluResult),
.q(ExeReg_aluResult)
);
endmodule
6. Testing and Debugging
* Testing Tools: VIVADO, Modelsim, COMPORTMASTER
* Bubble Sort Program을 ROM에 넣어 Testbench 진행
CPU Course Review - 10. Analysis of Assembly Language (Single Cycle RISC-V)
1. TitleAnalysis of Assembly Language through RISC-V CPU2. Category"Assembly", "Machine Language" 3. Key ConceptsC언어로 작성한 Bubble Sort Program이 어떻게 기계어로 번역되고 실행되는지, Assembly 언어 분석을 통해 확인한다
salmon1113.tistory.com
module tb_MCU(
);
logic clk, reset;
always #5 clk = ~clk;
MCU DUT(
.clk(clk), .reset(reset)
);
initial begin
clk = 0;
reset = 1;
#5 reset = 0;
end
endmodule
'PROJECTS > CPU 설계' 카테고리의 다른 글
CPU Course Project - AXI4-Lite & SoC Project (0) | 2025.03.17 |
---|---|
CPU Course Project - RISC-V / AMBA APB (0) | 2025.03.04 |
CPU Course Project - RISC-V (Single Cycle) (0) | 2025.02.18 |
CPU Course Project - Stop Watch Controlled by UART REVISION (0) | 2025.02.09 |