This lesson starts at commit 6304d615b3559db9ac9908467a134b971d5b82c0.

7. Control flow instructions

Let's continue with the control flow instructions. These are instructions that perform a "jump" or "call" and need to change the pc register in the fetch module. To do this, we need to add some signals in the output of the execute module and let the fetch module use them as inputs. We'll add an indicator signal jump that indicates if the pc should be overwritten, and a jump_address vector to pass the new address of the pc register.

src/core.vhd CHANGED Viewed

@@ -20,11 +20,15 @@ architecture rtl of core is
 	signal execute_output: execute_output_t;
 	signal memory_output: memory_output_t;
 	signal pipeline_ready: std_logic;
 	component fetch is
 		port (
 			clk: in std_logic;
 			pipeline_ready: in std_logic;
 			output: out fetch_output_t
 		);
 	end component;
@@ -44,6 +48,8 @@ architecture rtl of core is
 			clk: in std_logic;
 			input: in decode_output_t;
 			output: out execute_output_t;
 			led: out std_logic_vector(7 downto 0)
 		);
 	end component;
@@ -57,11 +63,11 @@ architecture rtl of core is
 	end component;
 begin
-	fetch_inst: fetch port map(clk => clk, output => fetch_output, pipeline_ready => pipeline_ready);
 	decode_write_inst: decode_write port map(clk => clk, decode_input => fetch_output, decode_output => decode_output, write_input => memory_output, pipeline_ready => pipeline_ready);
-	execute_inst: execute port map(clk => clk, input => decode_output, output => execute_output, led => led);
 	memory_inst: memory port map(clk => clk, input => execute_output, output => memory_output);

 	signal execute_output: execute_output_t;
 	signal memory_output: memory_output_t;
 	signal pipeline_ready: std_logic;
+	signal jump: std_logic;
+	signal jump_address: std_logic_vector(31 downto 0);
 	component fetch is
 		port (
 			clk: in std_logic;
 			pipeline_ready: in std_logic;
+			jump: in std_logic;
+			jump_address: in std_logic_vector(31 downto 0);
 			output: out fetch_output_t
 		);
 	end component;
 			clk: in std_logic;
 			input: in decode_output_t;
 			output: out execute_output_t;
+			jump: out std_logic := '0';
+			jump_address: out std_logic_vector(31 downto 0);
 			led: out std_logic_vector(7 downto 0)
 		);
 	end component;
 	end component;
 begin
+	fetch_inst: fetch port map(clk => clk, pipeline_ready => pipeline_ready, jump => jump, jump_address => jump_address, output => fetch_output);
 	decode_write_inst: decode_write port map(clk => clk, decode_input => fetch_output, decode_output => decode_output, write_input => memory_output, pipeline_ready => pipeline_ready);
+	execute_inst: execute port map(clk => clk, input => decode_output, output => execute_output, jump => jump, jump_address => jump_address, led => led);
 	memory_inst: memory port map(clk => clk, input => execute_output, output => memory_output);

src/core/execute.vhd CHANGED Viewed

@@ -11,6 +11,8 @@ entity execute is
 		clk: in std_logic;
 		input: in decode_output_t;
 		output: out execute_output_t := DEFAULT_EXECUTE_OUTPUT;
 		led: out std_logic_vector(7 downto 0) := (others => '0')
 	);
 end execute;

 		clk: in std_logic;
 		input: in decode_output_t;
 		output: out execute_output_t := DEFAULT_EXECUTE_OUTPUT;
+		jump: out std_logic := '0';
+		jump_address: out std_logic_vector(31 downto 0) := (others => '0');
 		led: out std_logic_vector(7 downto 0) := (others => '0')
 	);
 end execute;

src/core/fetch.vhd CHANGED Viewed

@@ -10,6 +10,8 @@ entity fetch is
 	port (
 		clk: in std_logic;
 		pipeline_ready: in std_logic;
 		output: out fetch_output_t := DEFAULT_FETCH_OUTPUT
 	);
 end fetch;

 	port (
 		clk: in std_logic;
 		pipeline_ready: in std_logic;
+		jump: in std_logic;
+		jump_address: in std_logic_vector(31 downto 0);
 		output: out fetch_output_t := DEFAULT_FETCH_OUTPUT
 	);
 end fetch;

Now, we want to use them in the fetch module.

src/core/fetch.vhd CHANGED Viewed

@@ -37,6 +37,8 @@ begin
 				output.is_active <= '1';
 				output.instr <= imem(to_integer(pc(5 downto 2)));
 				output.pc <= std_logic_vector(pc);
 			else
 				output <= DEFAULT_FETCH_OUTPUT;
 			end if;

 				output.is_active <= '1';
 				output.instr <= imem(to_integer(pc(5 downto 2)));
 				output.pc <= std_logic_vector(pc);
+			elsif jump = '1' then
+				pc <= unsigned(jump_address);
 			else
 				output <= DEFAULT_FETCH_OUTPUT;
 			end if;

We don't support fetching and jumping at the same time; I'll add an assertion to check this.

src/core/fetch.vhd CHANGED Viewed

@@ -37,6 +37,8 @@ begin
 				output.is_active <= '1';
 				output.instr <= imem(to_integer(pc(5 downto 2)));
 				output.pc <= std_logic_vector(pc);
 			elsif jump = '1' then
 				pc <= unsigned(jump_address);
 			else

 				output.is_active <= '1';
 				output.instr <= imem(to_integer(pc(5 downto 2)));
 				output.pc <= std_logic_vector(pc);
+				assert jump = '0' report "Fetching and jumping at the same cycle is not supported";
 			elsif jump = '1' then
 				pc <= unsigned(jump_address);
 			else

Now, we want to implement the JAL and JALR instructions. The docs say this about them:

The jump and link (JAL) instruction uses the J-type format, where the J-immediate encodes a signed offset in multiples of 2 bytes. The offset is sign-extended and added to the address of the jump instruction to form the jump target address. Jumps can therefore target a ±1 MiB range. JAL stores the address of the instruction following the jump ('pc'+4) into register rd.

The indirect jump instruction JALR (jump and link register) uses the I-type encoding. The target address is obtained by adding the sign-extended 12-bit I-immediate to the register rs1, then setting the least-significant bit of the result to zero. The address of the instruction following the jump (pc+4) is written to register rd.

Note that for the JALR instruction we'll actually need three operands:

The immediate
The value of the rs1 register
The value that will be stored in the destination register

So, we'll add an operand to the output of the decode stage, which we'll use to pass the value that should be stored in the destination register.

src/core/constants.vhd CHANGED Viewed

@@ -17,6 +17,7 @@ package core_constants is
 		operation => OP_ADD,
 		operand1 => (others => '0'),
 		operand2 => (others => '0'),
 		destination_reg => (others => '0')
 	);

 		operation => OP_ADD,
 		operand1 => (others => '0'),
 		operand2 => (others => '0'),
+		operand3 => (others => '0'),
 		destination_reg => (others => '0')
 	);

src/core/types.vhd CHANGED Viewed

@@ -17,6 +17,7 @@ package core_types is
 		operation: operation_t;
 		operand1: std_logic_vector(31 downto 0);
 		operand2: std_logic_vector(31 downto 0);
 		destination_reg: std_logic_vector(4 downto 0);
 	end record decode_output_t;

 		operation: operation_t;
 		operand1: std_logic_vector(31 downto 0);
 		operand2: std_logic_vector(31 downto 0);
+		operand3: std_logic_vector(31 downto 0);
 		destination_reg: std_logic_vector(4 downto 0);
 	end record decode_output_t;

Now we're ready to decode the JAL and JALR instructions.

src/core/decode_write.vhd CHANGED Viewed

@@ -66,6 +66,7 @@ begin
 			-- sign extension
 			b_imm_s := std_logic_vector(resize(signed(b_imm), 32));
 			i_imm_s := std_logic_vector(resize(signed(i_imm), 32));
 			v_decode_output := DEFAULT_DECODE_OUTPUT;
@@ -86,9 +87,19 @@ begin
 					v_decode_output.operand2 := u_imm;
 					v_decode_output.destination_reg := rd;
 				elsif opcode = "1101111" then
-					-- TODO: JAL
 				elsif opcode = "1100111" and funct3 = "000" then
-					-- TODO: JALR
 				elsif opcode = "1100011" then
 					if funct3 = "000" then
 						-- TODO: BEQ

 			-- sign extension
 			b_imm_s := std_logic_vector(resize(signed(b_imm), 32));
 			i_imm_s := std_logic_vector(resize(signed(i_imm), 32));
+			j_imm_s := std_logic_vector(resize(signed(j_imm), 32));
 			v_decode_output := DEFAULT_DECODE_OUTPUT;
 					v_decode_output.operand2 := u_imm;
 					v_decode_output.destination_reg := rd;
 				elsif opcode = "1101111" then
+					-- JAL
+					v_decode_output.operation := OP_JAL;
+					v_decode_output.operand1 := decode_input.pc;
+					v_decode_output.operand2 := j_imm_s;
+					v_decode_output.operand3 := std_logic_vector(unsigned(decode_input.pc) + 4);
+					v_decode_output.destination_reg := rd;
 				elsif opcode = "1100111" and funct3 = "000" then
+					-- JALR
+					v_decode_output.operation := OP_JAL;
+					v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
+					v_decode_output.operand2 := i_imm_s;
+					v_decode_output.operand3 := std_logic_vector(unsigned(decode_input.pc) + 4);
+					v_decode_output.destination_reg := rd;
 				elsif opcode = "1100011" then
 					if funct3 = "000" then
 						-- TODO: BEQ

We just need to implement OP_JAL, which should set result to pc + 4, add operand 1 and 2, set the LSB of the result to zero, and jump to that.

src/core/execute.vhd CHANGED Viewed

@@ -24,11 +24,15 @@ begin
 	process (clk)
 		variable v_output: execute_output_t;
 		variable v_sign: std_logic_vector(31 downto 0);
 	begin
 		if rising_edge(clk) then
 			v_output := DEFAULT_EXECUTE_OUTPUT;
 			v_output.is_active := input.is_active;
 			if input.is_active = '1' and input.is_invalid = '0' then
 				if input.operation = OP_ADD then
@@ -95,6 +99,10 @@ begin
 					if input.operand2(0) = '1' then
 						v_output.result := v_sign(1 downto 0) & v_output.result(31 downto 2);
 					end if;
 				elsif input.operation = OP_LED then
 					led <= input.operand1(7 downto 0);
 				else
@@ -104,6 +112,9 @@ begin
 				v_output.destination_reg := input.destination_reg;
 			end if;
 			output <= v_output;
 		end if;
 	end process;

 	process (clk)
 		variable v_output: execute_output_t;
 		variable v_sign: std_logic_vector(31 downto 0);
+		variable v_jump: std_logic;
+		variable v_jump_address: std_logic_vector(31 downto 0);
 	begin
 		if rising_edge(clk) then
 			v_output := DEFAULT_EXECUTE_OUTPUT;
 			v_output.is_active := input.is_active;
+			v_jump := '0';
+			v_jump_address := (others => '0');
 			if input.is_active = '1' and input.is_invalid = '0' then
 				if input.operation = OP_ADD then
 					if input.operand2(0) = '1' then
 						v_output.result := v_sign(1 downto 0) & v_output.result(31 downto 2);
 					end if;
+				elsif input.operation = OP_JAL then
+					v_jump := '1';
+					v_jump_address := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
+					v_output.result := input.operand3;
 				elsif input.operation = OP_LED then
 					led <= input.operand1(7 downto 0);
 				else
 				v_output.destination_reg := input.destination_reg;
 			end if;
+			jump <= v_jump;
+			jump_address <= v_jump_address(31 downto 1) & "0";
 			output <= v_output;
 		end if;
 	end process;

src/core/types.vhd CHANGED Viewed

@@ -3,7 +3,7 @@ use ieee.std_logic_1164.all;
 package core_types is
-	type operation_t is (OP_ADD, OP_SLT, OP_SLTU, OP_XOR, OP_OR, OP_AND, OP_SLL, OP_SRL, OP_SRA, OP_SUB, OP_LED);
 	type fetch_output_t is record
 		is_active: std_logic;

 package core_types is
+	type operation_t is (OP_ADD, OP_SLT, OP_SLTU, OP_XOR, OP_OR, OP_AND, OP_SLL, OP_SRL, OP_SRA, OP_SUB, OP_JAL, OP_LED);
 	type fetch_output_t is record
 		is_active: std_logic;

Now, let's decode BEQ, BNE, BLT, BGE, BLTU, BGEU.

src/core/decode_write.vhd CHANGED Viewed

@@ -101,18 +101,28 @@ begin
 					v_decode_output.operand3 := std_logic_vector(unsigned(decode_input.pc) + 4);
 					v_decode_output.destination_reg := rd;
 				elsif opcode = "1100011" then
 					if funct3 = "000" then
-						-- TODO: BEQ
 					elsif funct3 = "001" then
-						-- TODO: BNE
 					elsif funct3 = "100" then
-						-- TODO: BLT
 					elsif funct3 = "101" then
-						-- TODO: BGE
 					elsif funct3 = "110" then
-						-- TODO: BLTU
 					elsif funct3 = "111" then
-						-- TODO: BGEU
 					else
 						v_decode_output.is_invalid := '1';
 					end if;

 					v_decode_output.operand3 := std_logic_vector(unsigned(decode_input.pc) + 4);
 					v_decode_output.destination_reg := rd;
 				elsif opcode = "1100011" then
+					v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
+					v_decode_output.operand2 := reg(to_integer(unsigned(rs2)));
+					v_decode_output.operand3 := std_logic_vector(unsigned(decode_input.pc) + unsigned(b_imm_s));
 					if funct3 = "000" then
+						-- BEQ
+						v_decode_output.operation := OP_BEQ;
 					elsif funct3 = "001" then
+						-- BNE
+						v_decode_output.operation := OP_BNE;
 					elsif funct3 = "100" then
+						-- BLT
+						v_decode_output.operation := OP_BLT;
 					elsif funct3 = "101" then
+						-- BGE
+						v_decode_output.operation := OP_BGE;
 					elsif funct3 = "110" then
+						-- BLTU
+						v_decode_output.operation := OP_BLTU;
 					elsif funct3 = "111" then
+						-- BGEU
+						v_decode_output.operation := OP_BGEU;
 					else
 						v_decode_output.is_invalid := '1';
 					end if;

src/core/types.vhd CHANGED Viewed

@@ -3,7 +3,7 @@ use ieee.std_logic_1164.all;
 package core_types is
-	type operation_t is (OP_ADD, OP_SLT, OP_SLTU, OP_XOR, OP_OR, OP_AND, OP_SLL, OP_SRL, OP_SRA, OP_SUB, OP_JAL, OP_LED);
 	type fetch_output_t is record
 		is_active: std_logic;

 package core_types is
+	type operation_t is (OP_ADD, OP_SLT, OP_SLTU, OP_XOR, OP_OR, OP_AND, OP_SLL, OP_SRL, OP_SRA, OP_SUB, OP_JAL, OP_BEQ, OP_BNE, OP_BLT, OP_BGE, OP_BLTU, OP_BGEU, OP_LED);
 	type fetch_output_t is record
 		is_active: std_logic;

And implement OP_BEQ, OP_BNE, OP_BLT, OP_BGE, OP_BLTU, OP_BGEU.

src/core/execute.vhd CHANGED Viewed

@@ -103,6 +103,36 @@ begin
 					v_jump := '1';
 					v_jump_address := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
 					v_output.result := input.operand3;
 				elsif input.operation = OP_LED then
 					led <= input.operand1(7 downto 0);
 				else

 					v_jump := '1';
 					v_jump_address := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
 					v_output.result := input.operand3;
+				elsif input.operation = OP_BEQ then
+					if input.operand1 = input.operand2 then
+						v_jump := '1';
+						v_jump_address := input.operand3;
+					end if;
+				elsif input.operation = OP_BNE then
+					if input.operand1 /= input.operand2 then
+						v_jump := '1';
+						v_jump_address := input.operand3;
+					end if;
+				elsif input.operation = OP_BLT then
+					if signed(input.operand1) < signed(input.operand2) then
+						v_jump := '1';
+						v_jump_address := input.operand3;
+					end if;
+				elsif input.operation = OP_BGE then
+					if signed(input.operand1) >= signed(input.operand2) then
+						v_jump := '1';
+						v_jump_address := input.operand3;
+					end if;
+				elsif input.operation = OP_BLTU then
+					if unsigned(input.operand1) < unsigned(input.operand2) then
+						v_jump := '1';
+						v_jump_address := input.operand3;
+					end if;
+				elsif input.operation = OP_BGEU then
+					if unsigned(input.operand1) >= unsigned(input.operand2) then
+						v_jump := '1';
+						v_jump_address := input.operand3;
+					end if;
 				elsif input.operation = OP_LED then
 					led <= input.operand1(7 downto 0);
 				else

Now that we have a jump instruction, we don't need our custom HANG instruction anymore. Instead, we can just do

hang:
j hang

This is a "pseudoinstruction" that you can think of as syntactic sugar for a JAL instruction with immediate 0. So, the instruction jumps to itself, effectively hanging the CPU.

src/core/decode_write.vhd CHANGED Viewed

@@ -246,9 +246,6 @@ begin
 					v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
 					v_decode_output.operand2 := (others => '0');
 					v_decode_output.destination_reg := (others => '0');
-				elsif opcode = "1111111" and funct3 = "001" then
-					-- HANG (custom instruction): stops execution of the CPU
-					v_decode_output := DEFAULT_DECODE_OUTPUT;
 				else
 					v_decode_output.is_invalid := '1';
 				end if;

 					v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
 					v_decode_output.operand2 := (others => '0');
 					v_decode_output.destination_reg := (others => '0');
 				else
 					v_decode_output.is_invalid := '1';
 				end if;

That's all the work on the CPU for this lesson.

As a sanity check, I wrote this cute little program in RISC-V assembly to calculate Fibonacci numbers again.

# x3 stores the number of
# iterations we still have to do
li x3, 10

# init x1, x2 to F0, F-1
li x2, 1

loop:

# do two iterations
add x2, x1, x2
add x1, x1, x2

# decrease x3
addi x3, x3, -2

# loop if we're not done yet
bgt x3, x0, loop

# if the number of iterations is zero
beq x3, x0, end

# otherwise, x3 equals -1 and x1 has
# the next Fibonacci number, so we get
# the previous one which is stored in x2
mv x1, x2

end:
j end

We can assemble this with the online RISC-V assembler and put it into our instruction memory.

src/core/fetch.vhd CHANGED Viewed

@@ -20,8 +20,8 @@ end fetch;
 architecture rtl of fetch is
 	type instruction_memory_t is array(0 to 15) of std_logic_vector(31 downto 0);
 	signal imem: instruction_memory_t := (
-		X"00110113", X"002080b3", X"00208133", X"002080b3", X"00208133", X"002080b3", X"00208133", X"002080b3",
-		X"00208133", X"002080b3", X"00208133", X"002080b3", X"00208133", X"0001007f", X"0000107f", X"00000000"
 	);
 	signal pc: unsigned(31 downto 0) := (others => '0');

 architecture rtl of fetch is
 	type instruction_memory_t is array(0 to 15) of std_logic_vector(31 downto 0);
 	signal imem: instruction_memory_t := (
+		X"00a00193", X"00100113", X"00208133", X"002080b3", X"ffe18193", X"fe304ae3", X"00018463", X"00010093",
+		X"0000006f", X"00000000", X"00000000", X"00000000", X"00000000", X"00000000", X"00000000", X"00000000"
 	);
 	signal pc: unsigned(31 downto 0) := (others => '0');

Now, we can test it and see that x1 holds 0x37, which is 55 in decimal, and indeed the tenth Fibonacci number equals 55.