This lesson starts at commit 6304d615b3559db9ac9908467a134b971d5b82c0.

7. Control flow instructions

Let's continue with the control flow instructions. These are instructions that perform a "jump" or "call" and need to change the pc register in the fetch module. To do this, we need to add some signals in the output of the execute module and let the fetch module use them as inputs. We'll add an indicator signal jump that indicates if the pc should be overwritten, and a jump_address vector to pass the new address of the pc register.

src/core.vhd CHANGED
@@ -20,11 +20,15 @@ architecture rtl of core is
20
  signal execute_output: execute_output_t;
21
  signal memory_output: memory_output_t;
22
  signal pipeline_ready: std_logic;
 
 
23
 
24
  component fetch is
25
  port (
26
  clk: in std_logic;
27
  pipeline_ready: in std_logic;
 
 
28
  output: out fetch_output_t
29
  );
30
  end component;
@@ -44,6 +48,8 @@ architecture rtl of core is
44
  clk: in std_logic;
45
  input: in decode_output_t;
46
  output: out execute_output_t;
 
 
47
  led: out std_logic_vector(7 downto 0)
48
  );
49
  end component;
@@ -57,11 +63,11 @@ architecture rtl of core is
57
  end component;
58
 
59
  begin
60
- fetch_inst: fetch port map(clk => clk, output => fetch_output, pipeline_ready => pipeline_ready);
61
 
62
  decode_write_inst: decode_write port map(clk => clk, decode_input => fetch_output, decode_output => decode_output, write_input => memory_output, pipeline_ready => pipeline_ready);
63
 
64
- execute_inst: execute port map(clk => clk, input => decode_output, output => execute_output, led => led);
65
 
66
  memory_inst: memory port map(clk => clk, input => execute_output, output => memory_output);
67
 
 
20
  signal execute_output: execute_output_t;
21
  signal memory_output: memory_output_t;
22
  signal pipeline_ready: std_logic;
23
+ signal jump: std_logic;
24
+ signal jump_address: std_logic_vector(31 downto 0);
25
 
26
  component fetch is
27
  port (
28
  clk: in std_logic;
29
  pipeline_ready: in std_logic;
30
+ jump: in std_logic;
31
+ jump_address: in std_logic_vector(31 downto 0);
32
  output: out fetch_output_t
33
  );
34
  end component;
 
48
  clk: in std_logic;
49
  input: in decode_output_t;
50
  output: out execute_output_t;
51
+ jump: out std_logic := '0';
52
+ jump_address: out std_logic_vector(31 downto 0);
53
  led: out std_logic_vector(7 downto 0)
54
  );
55
  end component;
 
63
  end component;
64
 
65
  begin
66
+ fetch_inst: fetch port map(clk => clk, pipeline_ready => pipeline_ready, jump => jump, jump_address => jump_address, output => fetch_output);
67
 
68
  decode_write_inst: decode_write port map(clk => clk, decode_input => fetch_output, decode_output => decode_output, write_input => memory_output, pipeline_ready => pipeline_ready);
69
 
70
+ execute_inst: execute port map(clk => clk, input => decode_output, output => execute_output, jump => jump, jump_address => jump_address, led => led);
71
 
72
  memory_inst: memory port map(clk => clk, input => execute_output, output => memory_output);
73
 
src/core/execute.vhd CHANGED
@@ -11,6 +11,8 @@ entity execute is
11
  clk: in std_logic;
12
  input: in decode_output_t;
13
  output: out execute_output_t := DEFAULT_EXECUTE_OUTPUT;
 
 
14
  led: out std_logic_vector(7 downto 0) := (others => '0')
15
  );
16
  end execute;
 
11
  clk: in std_logic;
12
  input: in decode_output_t;
13
  output: out execute_output_t := DEFAULT_EXECUTE_OUTPUT;
14
+ jump: out std_logic := '0';
15
+ jump_address: out std_logic_vector(31 downto 0) := (others => '0');
16
  led: out std_logic_vector(7 downto 0) := (others => '0')
17
  );
18
  end execute;
src/core/fetch.vhd CHANGED
@@ -10,6 +10,8 @@ entity fetch is
10
  port (
11
  clk: in std_logic;
12
  pipeline_ready: in std_logic;
 
 
13
  output: out fetch_output_t := DEFAULT_FETCH_OUTPUT
14
  );
15
  end fetch;
 
10
  port (
11
  clk: in std_logic;
12
  pipeline_ready: in std_logic;
13
+ jump: in std_logic;
14
+ jump_address: in std_logic_vector(31 downto 0);
15
  output: out fetch_output_t := DEFAULT_FETCH_OUTPUT
16
  );
17
  end fetch;

Now, we want to use them in the fetch module.

src/core/fetch.vhd CHANGED
@@ -37,6 +37,8 @@ begin
37
  output.is_active <= '1';
38
  output.instr <= imem(to_integer(pc(5 downto 2)));
39
  output.pc <= std_logic_vector(pc);
 
 
40
  else
41
  output <= DEFAULT_FETCH_OUTPUT;
42
  end if;
 
37
  output.is_active <= '1';
38
  output.instr <= imem(to_integer(pc(5 downto 2)));
39
  output.pc <= std_logic_vector(pc);
40
+ elsif jump = '1' then
41
+ pc <= unsigned(jump_address);
42
  else
43
  output <= DEFAULT_FETCH_OUTPUT;
44
  end if;

We don't support fetching and jumping at the same time; I'll add an assertion to check this.

src/core/fetch.vhd CHANGED
@@ -37,6 +37,8 @@ begin
37
  output.is_active <= '1';
38
  output.instr <= imem(to_integer(pc(5 downto 2)));
39
  output.pc <= std_logic_vector(pc);
 
 
40
  elsif jump = '1' then
41
  pc <= unsigned(jump_address);
42
  else
 
37
  output.is_active <= '1';
38
  output.instr <= imem(to_integer(pc(5 downto 2)));
39
  output.pc <= std_logic_vector(pc);
40
+
41
+ assert jump = '0' report "Fetching and jumping at the same cycle is not supported";
42
  elsif jump = '1' then
43
  pc <= unsigned(jump_address);
44
  else

Now, we want to implement the JAL and JALR instructions. The docs say this about them:

The jump and link (JAL) instruction uses the J-type format, where the J-immediate encodes a signed offset in multiples of 2 bytes. The offset is sign-extended and added to the address of the jump instruction to form the jump target address. Jumps can therefore target a ±1 MiB range. JAL stores the address of the instruction following the jump ('pc'+4) into register rd.

The indirect jump instruction JALR (jump and link register) uses the I-type encoding. The target address is obtained by adding the sign-extended 12-bit I-immediate to the register rs1, then setting the least-significant bit of the result to zero. The address of the instruction following the jump (pc+4) is written to register rd.

Note that for the JALR instruction we'll actually need three operands:

  1. The immediate
  2. The value of the rs1 register
  3. The value that will be stored in the destination register

So, we'll add an operand to the output of the decode stage, which we'll use to pass the value that should be stored in the destination register.

src/core/constants.vhd CHANGED
@@ -17,6 +17,7 @@ package core_constants is
17
  operation => OP_ADD,
18
  operand1 => (others => '0'),
19
  operand2 => (others => '0'),
 
20
  destination_reg => (others => '0')
21
  );
22
 
 
17
  operation => OP_ADD,
18
  operand1 => (others => '0'),
19
  operand2 => (others => '0'),
20
+ operand3 => (others => '0'),
21
  destination_reg => (others => '0')
22
  );
23
 
src/core/types.vhd CHANGED
@@ -17,6 +17,7 @@ package core_types is
17
  operation: operation_t;
18
  operand1: std_logic_vector(31 downto 0);
19
  operand2: std_logic_vector(31 downto 0);
 
20
  destination_reg: std_logic_vector(4 downto 0);
21
  end record decode_output_t;
22
 
 
17
  operation: operation_t;
18
  operand1: std_logic_vector(31 downto 0);
19
  operand2: std_logic_vector(31 downto 0);
20
+ operand3: std_logic_vector(31 downto 0);
21
  destination_reg: std_logic_vector(4 downto 0);
22
  end record decode_output_t;
23
 

Now we're ready to decode the JAL and JALR instructions.

src/core/decode_write.vhd CHANGED
@@ -66,6 +66,7 @@ begin
66
  -- sign extension
67
  b_imm_s := std_logic_vector(resize(signed(b_imm), 32));
68
  i_imm_s := std_logic_vector(resize(signed(i_imm), 32));
 
69
 
70
  v_decode_output := DEFAULT_DECODE_OUTPUT;
71
 
@@ -86,9 +87,19 @@ begin
86
  v_decode_output.operand2 := u_imm;
87
  v_decode_output.destination_reg := rd;
88
  elsif opcode = "1101111" then
89
- -- TODO: JAL
 
 
 
 
 
90
  elsif opcode = "1100111" and funct3 = "000" then
91
- -- TODO: JALR
 
 
 
 
 
92
  elsif opcode = "1100011" then
93
  if funct3 = "000" then
94
  -- TODO: BEQ
 
66
  -- sign extension
67
  b_imm_s := std_logic_vector(resize(signed(b_imm), 32));
68
  i_imm_s := std_logic_vector(resize(signed(i_imm), 32));
69
+ j_imm_s := std_logic_vector(resize(signed(j_imm), 32));
70
 
71
  v_decode_output := DEFAULT_DECODE_OUTPUT;
72
 
 
87
  v_decode_output.operand2 := u_imm;
88
  v_decode_output.destination_reg := rd;
89
  elsif opcode = "1101111" then
90
+ -- JAL
91
+ v_decode_output.operation := OP_JAL;
92
+ v_decode_output.operand1 := decode_input.pc;
93
+ v_decode_output.operand2 := j_imm_s;
94
+ v_decode_output.operand3 := std_logic_vector(unsigned(decode_input.pc) + 4);
95
+ v_decode_output.destination_reg := rd;
96
  elsif opcode = "1100111" and funct3 = "000" then
97
+ -- JALR
98
+ v_decode_output.operation := OP_JAL;
99
+ v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
100
+ v_decode_output.operand2 := i_imm_s;
101
+ v_decode_output.operand3 := std_logic_vector(unsigned(decode_input.pc) + 4);
102
+ v_decode_output.destination_reg := rd;
103
  elsif opcode = "1100011" then
104
  if funct3 = "000" then
105
  -- TODO: BEQ

We just need to implement OP_JAL, which should set result to pc + 4, add operand 1 and 2, set the LSB of the result to zero, and jump to that.

src/core/execute.vhd CHANGED
@@ -24,11 +24,15 @@ begin
24
  process (clk)
25
  variable v_output: execute_output_t;
26
  variable v_sign: std_logic_vector(31 downto 0);
 
 
27
 
28
  begin
29
  if rising_edge(clk) then
30
  v_output := DEFAULT_EXECUTE_OUTPUT;
31
  v_output.is_active := input.is_active;
 
 
32
 
33
  if input.is_active = '1' and input.is_invalid = '0' then
34
  if input.operation = OP_ADD then
@@ -95,6 +99,10 @@ begin
95
  if input.operand2(0) = '1' then
96
  v_output.result := v_sign(1 downto 0) & v_output.result(31 downto 2);
97
  end if;
 
 
 
 
98
  elsif input.operation = OP_LED then
99
  led <= input.operand1(7 downto 0);
100
  else
@@ -104,6 +112,9 @@ begin
104
  v_output.destination_reg := input.destination_reg;
105
  end if;
106
 
 
 
 
107
  output <= v_output;
108
  end if;
109
  end process;
 
24
  process (clk)
25
  variable v_output: execute_output_t;
26
  variable v_sign: std_logic_vector(31 downto 0);
27
+ variable v_jump: std_logic;
28
+ variable v_jump_address: std_logic_vector(31 downto 0);
29
 
30
  begin
31
  if rising_edge(clk) then
32
  v_output := DEFAULT_EXECUTE_OUTPUT;
33
  v_output.is_active := input.is_active;
34
+ v_jump := '0';
35
+ v_jump_address := (others => '0');
36
 
37
  if input.is_active = '1' and input.is_invalid = '0' then
38
  if input.operation = OP_ADD then
 
99
  if input.operand2(0) = '1' then
100
  v_output.result := v_sign(1 downto 0) & v_output.result(31 downto 2);
101
  end if;
102
+ elsif input.operation = OP_JAL then
103
+ v_jump := '1';
104
+ v_jump_address := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
105
+ v_output.result := input.operand3;
106
  elsif input.operation = OP_LED then
107
  led <= input.operand1(7 downto 0);
108
  else
 
112
  v_output.destination_reg := input.destination_reg;
113
  end if;
114
 
115
+ jump <= v_jump;
116
+ jump_address <= v_jump_address(31 downto 1) & "0";
117
+
118
  output <= v_output;
119
  end if;
120
  end process;
src/core/types.vhd CHANGED
@@ -3,7 +3,7 @@ use ieee.std_logic_1164.all;
3
 
4
 
5
  package core_types is
6
- type operation_t is (OP_ADD, OP_SLT, OP_SLTU, OP_XOR, OP_OR, OP_AND, OP_SLL, OP_SRL, OP_SRA, OP_SUB, OP_LED);
7
 
8
  type fetch_output_t is record
9
  is_active: std_logic;
 
3
 
4
 
5
  package core_types is
6
+ type operation_t is (OP_ADD, OP_SLT, OP_SLTU, OP_XOR, OP_OR, OP_AND, OP_SLL, OP_SRL, OP_SRA, OP_SUB, OP_JAL, OP_LED);
7
 
8
  type fetch_output_t is record
9
  is_active: std_logic;

Now, let's decode BEQ, BNE, BLT, BGE, BLTU, BGEU.

src/core/decode_write.vhd CHANGED
@@ -101,18 +101,28 @@ begin
101
  v_decode_output.operand3 := std_logic_vector(unsigned(decode_input.pc) + 4);
102
  v_decode_output.destination_reg := rd;
103
  elsif opcode = "1100011" then
 
 
 
 
104
  if funct3 = "000" then
105
- -- TODO: BEQ
 
106
  elsif funct3 = "001" then
107
- -- TODO: BNE
 
108
  elsif funct3 = "100" then
109
- -- TODO: BLT
 
110
  elsif funct3 = "101" then
111
- -- TODO: BGE
 
112
  elsif funct3 = "110" then
113
- -- TODO: BLTU
 
114
  elsif funct3 = "111" then
115
- -- TODO: BGEU
 
116
  else
117
  v_decode_output.is_invalid := '1';
118
  end if;
 
101
  v_decode_output.operand3 := std_logic_vector(unsigned(decode_input.pc) + 4);
102
  v_decode_output.destination_reg := rd;
103
  elsif opcode = "1100011" then
104
+ v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
105
+ v_decode_output.operand2 := reg(to_integer(unsigned(rs2)));
106
+ v_decode_output.operand3 := std_logic_vector(unsigned(decode_input.pc) + unsigned(b_imm_s));
107
+
108
  if funct3 = "000" then
109
+ -- BEQ
110
+ v_decode_output.operation := OP_BEQ;
111
  elsif funct3 = "001" then
112
+ -- BNE
113
+ v_decode_output.operation := OP_BNE;
114
  elsif funct3 = "100" then
115
+ -- BLT
116
+ v_decode_output.operation := OP_BLT;
117
  elsif funct3 = "101" then
118
+ -- BGE
119
+ v_decode_output.operation := OP_BGE;
120
  elsif funct3 = "110" then
121
+ -- BLTU
122
+ v_decode_output.operation := OP_BLTU;
123
  elsif funct3 = "111" then
124
+ -- BGEU
125
+ v_decode_output.operation := OP_BGEU;
126
  else
127
  v_decode_output.is_invalid := '1';
128
  end if;
src/core/types.vhd CHANGED
@@ -3,7 +3,7 @@ use ieee.std_logic_1164.all;
3
 
4
 
5
  package core_types is
6
- type operation_t is (OP_ADD, OP_SLT, OP_SLTU, OP_XOR, OP_OR, OP_AND, OP_SLL, OP_SRL, OP_SRA, OP_SUB, OP_JAL, OP_LED);
7
 
8
  type fetch_output_t is record
9
  is_active: std_logic;
 
3
 
4
 
5
  package core_types is
6
+ type operation_t is (OP_ADD, OP_SLT, OP_SLTU, OP_XOR, OP_OR, OP_AND, OP_SLL, OP_SRL, OP_SRA, OP_SUB, OP_JAL, OP_BEQ, OP_BNE, OP_BLT, OP_BGE, OP_BLTU, OP_BGEU, OP_LED);
7
 
8
  type fetch_output_t is record
9
  is_active: std_logic;

And implement OP_BEQ, OP_BNE, OP_BLT, OP_BGE, OP_BLTU, OP_BGEU.

src/core/execute.vhd CHANGED
@@ -103,6 +103,36 @@ begin
103
  v_jump := '1';
104
  v_jump_address := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
105
  v_output.result := input.operand3;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  elsif input.operation = OP_LED then
107
  led <= input.operand1(7 downto 0);
108
  else
 
103
  v_jump := '1';
104
  v_jump_address := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
105
  v_output.result := input.operand3;
106
+ elsif input.operation = OP_BEQ then
107
+ if input.operand1 = input.operand2 then
108
+ v_jump := '1';
109
+ v_jump_address := input.operand3;
110
+ end if;
111
+ elsif input.operation = OP_BNE then
112
+ if input.operand1 /= input.operand2 then
113
+ v_jump := '1';
114
+ v_jump_address := input.operand3;
115
+ end if;
116
+ elsif input.operation = OP_BLT then
117
+ if signed(input.operand1) < signed(input.operand2) then
118
+ v_jump := '1';
119
+ v_jump_address := input.operand3;
120
+ end if;
121
+ elsif input.operation = OP_BGE then
122
+ if signed(input.operand1) >= signed(input.operand2) then
123
+ v_jump := '1';
124
+ v_jump_address := input.operand3;
125
+ end if;
126
+ elsif input.operation = OP_BLTU then
127
+ if unsigned(input.operand1) < unsigned(input.operand2) then
128
+ v_jump := '1';
129
+ v_jump_address := input.operand3;
130
+ end if;
131
+ elsif input.operation = OP_BGEU then
132
+ if unsigned(input.operand1) >= unsigned(input.operand2) then
133
+ v_jump := '1';
134
+ v_jump_address := input.operand3;
135
+ end if;
136
  elsif input.operation = OP_LED then
137
  led <= input.operand1(7 downto 0);
138
  else

Now that we have a jump instruction, we don't need our custom HANG instruction anymore. Instead, we can just do

hang:
j hang

This is a "pseudoinstruction" that you can think of as syntactic sugar for a JAL instruction with immediate 0. So, the instruction jumps to itself, effectively hanging the CPU.

src/core/decode_write.vhd CHANGED
@@ -248,9 +248,6 @@ begin
248
  v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
249
  v_decode_output.operand2 := (others => '0');
250
  v_decode_output.destination_reg := (others => '0');
251
- elsif opcode = "1111111" and funct3 = "001" then
252
- -- HANG (custom instruction): stops execution of the CPU
253
- v_decode_output := DEFAULT_DECODE_OUTPUT;
254
  else
255
  v_decode_output.is_invalid := '1';
256
  end if;
 
248
  v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
249
  v_decode_output.operand2 := (others => '0');
250
  v_decode_output.destination_reg := (others => '0');
 
 
 
251
  else
252
  v_decode_output.is_invalid := '1';
253
  end if;

That's all the work on the CPU for this lesson.

As a sanity check, I wrote this cute little program in RISC-V assembly to calculate Fibonacci numbers again.

# x3 stores the number of
# iterations we still have to do
li x3, 10

# init x1, x2 to F0, F-1
li x2, 1

loop:

# do two iterations
add x2, x1, x2
add x1, x1, x2

# decrease x3
addi x3, x3, -2

# loop if we're not done yet
bgt x3, x0, loop

# if the number of iterations is zero
beq x3, x0, end

# otherwise, x3 equals -1 and x1 has
# the next Fibonacci number, so we get
# the previous one which is stored in x2
mv x1, x2

end:
j end

We can assemble this with the online RISC-V assembler and put it into our instruction memory.

src/core/fetch.vhd CHANGED
@@ -20,8 +20,8 @@ end fetch;
20
  architecture rtl of fetch is
21
  type instruction_memory_t is array(0 to 15) of std_logic_vector(31 downto 0);
22
  signal imem: instruction_memory_t := (
23
- X"00110113", X"002080b3", X"00208133", X"002080b3", X"00208133", X"002080b3", X"00208133", X"002080b3",
24
- X"00208133", X"002080b3", X"00208133", X"002080b3", X"00208133", X"0001007f", X"0000107f", X"00000000"
25
  );
26
 
27
  signal pc: unsigned(31 downto 0) := (others => '0');
 
20
  architecture rtl of fetch is
21
  type instruction_memory_t is array(0 to 15) of std_logic_vector(31 downto 0);
22
  signal imem: instruction_memory_t := (
23
+ X"00a00193", X"00100113", X"00208133", X"002080b3", X"ffe18193", X"fe304ae3", X"00018463", X"00010093",
24
+ X"0000006f", X"00000000", X"00000000", X"00000000", X"00000000", X"00000000", X"00000000", X"00000000"
25
  );
26
 
27
  signal pc: unsigned(31 downto 0) := (others => '0');

Now, we can test it and see that x1 holds 0x37, which is 55 in decimal, and indeed the tenth Fibonacci number equals 55.