This lesson starts at commit 0fcfa0cdaf00a57a0f2da39b78b9c2c6cdfab014.

5. Intermezzo: Running our design

Let's take a break from starting at simulation waveforms and try to run our design on the dev board.

It would be nice to somehow see the result of our computations, so let's add a custom instruction that displays the 8 least significant bits of a register on the LEDs of the Mimas A7.

Until now, we have only run our core in simulation. The "main" or "top level" module that will be used on the FPGA is top_level.vhd. It was already there in the template, we have just never used it. It already has outputs for the LEDs on the dev board. So, first, let's instantiate our core in the top level module.

src/top_level.vhd CHANGED
@@ -14,14 +14,14 @@ end top_level;
14
  architecture rtl of top_level is
15
  signal count: unsigned(31 downto 0) := (others => '0');
16
 
 
 
 
 
 
 
17
  begin
18
- led(7 downto 0) <= std_logic_vector(count(30 downto 23));
19
-
20
- process (clk)
21
- begin
22
- if rising_edge(clk) then
23
- count <= count + 1;
24
- end if;
25
- end process;
26
 
27
  end rtl;
 
14
  architecture rtl of top_level is
15
  signal count: unsigned(31 downto 0) := (others => '0');
16
 
17
+ component core is
18
+ port (
19
+ clk: in std_logic
20
+ );
21
+ end component;
22
+
23
  begin
24
+
25
+ core_inst: core port map(clk => clk);
 
 
 
 
 
 
26
 
27
  end rtl;

To be able to set the LEDs we need to make an output for them in our core.

src/core.vhd CHANGED
@@ -8,7 +8,8 @@ use work.core_constants.all;
8
 
9
  entity core is
10
  port (
11
- clk: in std_logic
 
12
  );
13
  end core;
14
 
 
8
 
9
  entity core is
10
  port (
11
+ clk: in std_logic;
12
+ led: out std_logic_vector(7 downto 0)
13
  );
14
  end core;
15
 
src/top_level.vhd CHANGED
@@ -16,12 +16,13 @@ architecture rtl of top_level is
16
 
17
  component core is
18
  port (
19
- clk: in std_logic
 
20
  );
21
  end component;
22
 
23
  begin
24
 
25
- core_inst: core port map(clk => clk);
26
 
27
  end rtl;
 
16
 
17
  component core is
18
  port (
19
+ clk: in std_logic;
20
+ led: out std_logic_vector(7 downto 0)
21
  );
22
  end component;
23
 
24
  begin
25
 
26
+ core_inst: core port map(clk => clk, led => led);
27
 
28
  end rtl;

We want to set it from the execute stage.

src/core.vhd CHANGED
@@ -43,7 +43,8 @@ architecture rtl of core is
43
  port (
44
  clk: in std_logic;
45
  input: in decode_output_t;
46
- output: out execute_output_t
 
47
  );
48
  end component;
49
 
@@ -60,7 +61,7 @@ begin
60
 
61
  decode_write_inst: decode_write port map(clk => clk, decode_input => fetch_output, decode_output => decode_output, write_input => memory_output, pipeline_ready => pipeline_ready);
62
 
63
- execute_inst: execute port map(clk => clk, input => decode_output, output => execute_output);
64
 
65
  memory_inst: memory port map(clk => clk, input => execute_output, output => memory_output);
66
 
 
43
  port (
44
  clk: in std_logic;
45
  input: in decode_output_t;
46
+ output: out execute_output_t;
47
+ led: out std_logic_vector(7 downto 0)
48
  );
49
  end component;
50
 
 
61
 
62
  decode_write_inst: decode_write port map(clk => clk, decode_input => fetch_output, decode_output => decode_output, write_input => memory_output, pipeline_ready => pipeline_ready);
63
 
64
+ execute_inst: execute port map(clk => clk, input => decode_output, output => execute_output, led => led);
65
 
66
  memory_inst: memory port map(clk => clk, input => execute_output, output => memory_output);
67
 
src/core/execute.vhd CHANGED
@@ -10,7 +10,8 @@ entity execute is
10
  port (
11
  clk: in std_logic;
12
  input: in decode_output_t;
13
- output: out execute_output_t := DEFAULT_EXECUTE_OUTPUT
 
14
  );
15
  end execute;
16
 
 
10
  port (
11
  clk: in std_logic;
12
  input: in decode_output_t;
13
+ output: out execute_output_t := DEFAULT_EXECUTE_OUTPUT;
14
+ led: out std_logic_vector(7 downto 0) := (others => '0')
15
  );
16
  end execute;
17
 

Now, let's add an operation for setting the LED.

src/core/execute.vhd CHANGED
@@ -29,6 +29,8 @@ begin
29
  if input.is_active = '1' and input.is_invalid = '0' then
30
  if input.operation = OP_ADD then
31
  v_output.result := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
 
 
32
  else
33
  assert false report "Unhandled operation value in execute stage" severity failure;
34
  end if;
 
29
  if input.is_active = '1' and input.is_invalid = '0' then
30
  if input.operation = OP_ADD then
31
  v_output.result := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
32
+ elsif input.operation = OP_LED then
33
+ led <= input.operand1(7 downto 0);
34
  else
35
  assert false report "Unhandled operation value in execute stage" severity failure;
36
  end if;
src/core/types.vhd CHANGED
@@ -3,7 +3,7 @@ use ieee.std_logic_1164.all;
3
 
4
 
5
  package core_types is
6
- type operation_t is (OP_ADD);
7
 
8
  type fetch_output_t is record
9
  is_active: std_logic;
 
3
 
4
 
5
  package core_types is
6
+ type operation_t is (OP_ADD, OP_LED);
7
 
8
  type fetch_output_t is record
9
  is_active: std_logic;

Now, we can add our custom instruction. I'll use the opcode field and check if it's all ones.

src/core/decode_write.vhd CHANGED
@@ -72,6 +72,12 @@ begin
72
  v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
73
  v_decode_output.operand2 := reg(to_integer(unsigned(rs2)));
74
  v_decode_output.destination_reg := rd;
 
 
 
 
 
 
75
  else
76
  v_decode_output.is_invalid := '1';
77
  end if;
 
72
  v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
73
  v_decode_output.operand2 := reg(to_integer(unsigned(rs2)));
74
  v_decode_output.destination_reg := rd;
75
+ elsif opcode = "1111111" and funct3 = "000" then
76
+ -- LED rs1: set the LEDs to the 8 least significant bits of rs1
77
+ v_decode_output.operation := OP_LED;
78
+ v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
79
+ v_decode_output.operand2 := (others => '0');
80
+ v_decode_output.destination_reg := (others => '0');
81
  else
82
  v_decode_output.is_invalid := '1';
83
  end if;

I also want to implement an instruction that makes our CPU hang. If we don't stop it, our CPU will keep executing the same 16 instructions over and over again, and it will be impossible to observe the LEDs when they keep changing thousands of times per second.

Currently, the fetch stage only fetches new instructions when an instruction is done, which is detected by observing the is_active flag. So, we can simply add an instruction that does not set is_active, and the fetch stage will stop, halting the entire pipeline.

src/core/decode_write.vhd CHANGED
@@ -78,6 +78,9 @@ begin
78
  v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
79
  v_decode_output.operand2 := (others => '0');
80
  v_decode_output.destination_reg := (others => '0');
 
 
 
81
  else
82
  v_decode_output.is_invalid := '1';
83
  end if;
 
78
  v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
79
  v_decode_output.operand2 := (others => '0');
80
  v_decode_output.destination_reg := (others => '0');
81
+ elsif opcode = "1111111" and funct3 = "001" then
82
+ -- HANG
83
+ v_decode_output := DEFAULT_DECODE_OUTPUT;
84
  else
85
  v_decode_output.is_invalid := '1';
86
  end if;

I've dubbed this custom instruction HANG.

Can we do anything interesting with 16 instructions? If we set x2 to 1 with ADDI x2, x2, 1, and then alternatingly do ADD x1, x1, x2 and ADD x2, x1, x2, we can calculate the Fibonacci numbers. The even-numbered Fibonacci numbers (F0, F1, F2, ...) end up in x1, while the odd-numbered Fibonacci numbers (F1, F3, F5, ...) end up in x2.

After doing n iterations of the ADD instruction, we have computed the (n+1)th Fibonacci number. So if we do a single ADDI instruction, 11 ADD instructions, the LED x2 instruction, and the HANG instruction, we compute the 13th Fibonacci number in 15 instructions.

Using the online assembler again, we find that ADDI x2, x2, 1 assembles to 00110113, ADD x1, x1, x2 assembles to 002080b3, and ADD x2, x1, x2 assembles to 00208133.

We have to assemble our custom instructions by hand. LED x2 assembles to 0001007f, and HANG assembles to 0000107f.

Now, we put this into our instruction memory.

src/core/fetch.vhd CHANGED
@@ -18,8 +18,8 @@ end fetch;
18
  architecture rtl of fetch is
19
  type instruction_memory_t is array(0 to 15) of std_logic_vector(31 downto 0);
20
  signal imem: instruction_memory_t := (
21
- X"00108093", X"00108093", X"00000003", X"00000004", X"00000005", X"00000006", X"00000007", X"00000008",
22
- X"00000009", X"0000000A", X"0000000B", X"0000000C", X"0000000D", X"0000000E", X"0000000F", X"00000010"
23
  );
24
 
25
  signal pc: unsigned(31 downto 0) := (others => '0');
 
18
  architecture rtl of fetch is
19
  type instruction_memory_t is array(0 to 15) of std_logic_vector(31 downto 0);
20
  signal imem: instruction_memory_t := (
21
+ X"00110113", X"002080b3", X"00208133", X"002080b3", X"00208133", X"002080b3", X"00208133", X"002080b3",
22
+ X"00208133", X"002080b3", X"00208133", X"002080b3", X"00208133", X"0001007f", X"0000107f", X"00000000"
23
  );
24
 
25
  signal pc: unsigned(31 downto 0) := (others => '0');

If we run this with

make bitstream
make program

and check the LEDs on our dev board, we can see they display the binary pattern 11101001. This is the binary representation of the number 233, which is indeed the 13th Fibonacci number. Success!