This lesson starts at commit 0fcfa0cdaf00a57a0f2da39b78b9c2c6cdfab014.

5. Intermezzo: Running our design

Let's take a break from starting at simulation waveforms and try to run our design on the dev board.

It would be nice to somehow see the result of our computations, so let's add a custom instruction that displays the 8 least significant bits of a register on the LEDs of the Mimas A7.

Until now, we have only run our core in simulation. The "main" or "top level" module that will be used on the FPGA is top_level.vhd. It was already there in the template, we have just never used it. It already has outputs for the LEDs on the dev board. So, first, let's instantiate our core in the top level module.

src/top_level.vhd CHANGED Viewed

@@ -14,14 +14,14 @@ end top_level;
 architecture rtl of top_level is
 	signal count: unsigned(31 downto 0) := (others => '0');
 begin
-	led(7 downto 0) <= std_logic_vector(count(30 downto 23));
-	process (clk)
-	begin
-		if rising_edge(clk) then
-			count <= count + 1;
-		end if;
-	end process;
 end rtl;

 architecture rtl of top_level is
 	signal count: unsigned(31 downto 0) := (others => '0');
+	component core is
+		port (
+			clk: in std_logic
+		);
+	end component;
 begin
+	core_inst: core port map(clk => clk);
 end rtl;

To be able to set the LEDs we need to make an output for them in our core.

src/core.vhd CHANGED Viewed

@@ -8,7 +8,8 @@ use work.core_constants.all;
 entity core is
 	port (
-		clk: in std_logic
 	);
 end core;

 entity core is
 	port (
+		clk: in std_logic;
+		led: out std_logic_vector(7 downto 0)
 	);
 end core;

src/top_level.vhd CHANGED Viewed

@@ -16,12 +16,13 @@ architecture rtl of top_level is
 	component core is
 		port (
-			clk: in std_logic
 		);
 	end component;
 begin
-	core_inst: core port map(clk => clk);
 end rtl;

 	component core is
 		port (
+			clk: in std_logic;
+			led: out std_logic_vector(7 downto 0)
 		);
 	end component;
 begin
+	core_inst: core port map(clk => clk, led => led);
 end rtl;

We want to set it from the execute stage.

src/core.vhd CHANGED Viewed

@@ -43,7 +43,8 @@ architecture rtl of core is
 		port (
 			clk: in std_logic;
 			input: in decode_output_t;
-			output: out execute_output_t
 		);
 	end component;
@@ -60,7 +61,7 @@ begin
 	decode_write_inst: decode_write port map(clk => clk, decode_input => fetch_output, decode_output => decode_output, write_input => memory_output, pipeline_ready => pipeline_ready);
-	execute_inst: execute port map(clk => clk, input => decode_output, output => execute_output);
 	memory_inst: memory port map(clk => clk, input => execute_output, output => memory_output);

 		port (
 			clk: in std_logic;
 			input: in decode_output_t;
+			output: out execute_output_t;
+			led: out std_logic_vector(7 downto 0)
 		);
 	end component;
 	decode_write_inst: decode_write port map(clk => clk, decode_input => fetch_output, decode_output => decode_output, write_input => memory_output, pipeline_ready => pipeline_ready);
+	execute_inst: execute port map(clk => clk, input => decode_output, output => execute_output, led => led);
 	memory_inst: memory port map(clk => clk, input => execute_output, output => memory_output);

src/core/execute.vhd CHANGED Viewed

@@ -10,7 +10,8 @@ entity execute is
 	port (
 		clk: in std_logic;
 		input: in decode_output_t;
-		output: out execute_output_t := DEFAULT_EXECUTE_OUTPUT
 	);
 end execute;

 	port (
 		clk: in std_logic;
 		input: in decode_output_t;
+		output: out execute_output_t := DEFAULT_EXECUTE_OUTPUT;
+		led: out std_logic_vector(7 downto 0) := (others => '0')
 	);
 end execute;

Now, let's add an operation for setting the LED.

src/core/execute.vhd CHANGED Viewed

@@ -29,6 +29,8 @@ begin
 			if input.is_active = '1' and input.is_invalid = '0' then
 				if input.operation = OP_ADD then
 					v_output.result := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
 				else
 					assert false report "Unhandled operation value in execute stage" severity failure;
 				end if;

 			if input.is_active = '1' and input.is_invalid = '0' then
 				if input.operation = OP_ADD then
 					v_output.result := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
+				elsif input.operation = OP_LED then
+					led <= input.operand1(7 downto 0);
 				else
 					assert false report "Unhandled operation value in execute stage" severity failure;
 				end if;

src/core/types.vhd CHANGED Viewed

@@ -3,7 +3,7 @@ use ieee.std_logic_1164.all;
 package core_types is
-	type operation_t is (OP_ADD);
 	type fetch_output_t is record
 		is_active: std_logic;

 package core_types is
+	type operation_t is (OP_ADD, OP_LED);
 	type fetch_output_t is record
 		is_active: std_logic;

Now, we can add our custom instruction. I'll use the opcode field and check if it's all ones.

src/core/decode_write.vhd CHANGED Viewed

@@ -72,6 +72,12 @@ begin
 					v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
 					v_decode_output.operand2 := reg(to_integer(unsigned(rs2)));
 					v_decode_output.destination_reg := rd;
 				else
 					v_decode_output.is_invalid := '1';
 				end if;

 					v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
 					v_decode_output.operand2 := reg(to_integer(unsigned(rs2)));
 					v_decode_output.destination_reg := rd;
+				elsif opcode = "1111111" and funct3 = "000" then
+					-- LED rs1: set the LEDs to the 8 least significant bits of rs1
+					v_decode_output.operation := OP_LED;
+					v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
+					v_decode_output.operand2 := (others => '0');
+					v_decode_output.destination_reg := (others => '0');
 				else
 					v_decode_output.is_invalid := '1';
 				end if;

I also want to implement an instruction that makes our CPU hang. If we don't stop it, our CPU will keep executing the same 16 instructions over and over again, and it will be impossible to observe the LEDs when they keep changing thousands of times per second.

Currently, the fetch stage only fetches new instructions when an instruction is done, which is detected by observing the is_active flag. So, we can simply add an instruction that does not set is_active, and the fetch stage will stop, halting the entire pipeline.

src/core/decode_write.vhd CHANGED Viewed

@@ -78,6 +78,9 @@ begin
 					v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
 					v_decode_output.operand2 := (others => '0');
 					v_decode_output.destination_reg := (others => '0');
 				else
 					v_decode_output.is_invalid := '1';
 				end if;

 					v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
 					v_decode_output.operand2 := (others => '0');
 					v_decode_output.destination_reg := (others => '0');
+				elsif opcode = "1111111" and funct3 = "001" then
+					-- HANG
+					v_decode_output := DEFAULT_DECODE_OUTPUT;
 				else
 					v_decode_output.is_invalid := '1';
 				end if;

I've dubbed this custom instruction HANG.

Can we do anything interesting with 16 instructions? If we set x2 to 1 with ADDI x2, x2, 1, and then alternatingly do ADD x1, x1, x2 and ADD x2, x1, x2, we can calculate the Fibonacci numbers. The even-numbered Fibonacci numbers (F0, F1, F2, ...) end up in x1, while the odd-numbered Fibonacci numbers (F1, F3, F5, ...) end up in x2.

After doing n iterations of the ADD instruction, we have computed the (n+1)th Fibonacci number. So if we do a single ADDI instruction, 11 ADD instructions, the LED x2 instruction, and the HANG instruction, we compute the 13th Fibonacci number in 15 instructions.

Using the online assembler again, we find that ADDI x2, x2, 1 assembles to 00110113, ADD x1, x1, x2 assembles to 002080b3, and ADD x2, x1, x2 assembles to 00208133.

We have to assemble our custom instructions by hand. LED x2 assembles to 0001007f, and HANG assembles to 0000107f.

Now, we put this into our instruction memory.

src/core/fetch.vhd CHANGED Viewed

@@ -18,8 +18,8 @@ end fetch;
 architecture rtl of fetch is
 	type instruction_memory_t is array(0 to 15) of std_logic_vector(31 downto 0);
 	signal imem: instruction_memory_t := (
-		X"00108093", X"00108093", X"00000003", X"00000004", X"00000005", X"00000006", X"00000007", X"00000008",
-		X"00000009", X"0000000A", X"0000000B", X"0000000C", X"0000000D", X"0000000E", X"0000000F", X"00000010"
 	);
 	signal pc: unsigned(31 downto 0) := (others => '0');

 architecture rtl of fetch is
 	type instruction_memory_t is array(0 to 15) of std_logic_vector(31 downto 0);
 	signal imem: instruction_memory_t := (
+		X"00110113", X"002080b3", X"00208133", X"002080b3", X"00208133", X"002080b3", X"00208133", X"002080b3",
+		X"00208133", X"002080b3", X"00208133", X"002080b3", X"00208133", X"0001007f", X"0000107f", X"00000000"
 	);
 	signal pc: unsigned(31 downto 0) := (others => '0');

If we run this with

make bitstream
make program

and check the LEDs on our dev board, we can see they display the binary pattern 11101001. This is the binary representation of the number 233, which is indeed the 13th Fibonacci number. Success!