-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgs4502b_instruction_prefetch.vhdl
675 lines (597 loc) · 30.9 KB
/
gs4502b_instruction_prefetch.vhdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
-- 1. Every memory write could be an instance of self-modifying code (SMC). There
-- just isn't any way to know. Worse, many cases of SMC modify the very next
-- instruction to be executed, so have to be able to flush the prefetch
-- pipeline when required. This is really just a special case of:
--
-- 2. The CPU could indicate an unexpected branch (or interrupt) at anytime, in
-- which case we need to finish what we are doing, and quickly start fetching
-- instructions from the new location. It would be great for branches, for
-- example, to be able to have speculatively loaded the right bytes to avoid
-- latency (since we have at least 25% more memory bandwidth than required)
-- when taking unexpected branches.
--
-- 3. For JMP, JSR, BSR, BRA we can immediately redirect to the new instruction
-- address. Also for branches that we expect will be taken. This will reduce,
-- but not eliminate, the latency of executing the instructions that follow.
-- We could have a short instruction queue to help hide this.
--
-- 4. for RTS and RTI, we could also have access to the return address (which
-- we will know, since we computed the new address), and which we could use
-- immediately, provided that we know that no stack fiddling or other changes
-- have occurred that would redirect it.
use WORK.ALL;
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;
use Std.TextIO.all;
use work.debugtools.all;
use work.addressing_modes.all;
use work.instruction_equations.all;
use work.extra_instruction_equations.all;
use work.instruction_lengths.all;
use work.types.all;
use work.instruction_types.all;
use work.visualise.all;
entity gs4502b_instruction_prefetch is
generic (
entity_name : in string
);
port (
cpuclock : in std_logic;
reset : in std_logic;
coreid : in integer;
primary_core_boost : in boolean;
current_cpu_personality : in cpu_personality;
-- Input: 1-bit flag + destination address from execute stage to instruct us to
-- divert (whether due to branch mis-predict, RTS/RTI, interrupt or trap
-- entry/return).
address_redirecting : in boolean;
redirected_address : in translated_address;
redirected_pch : in unsigned(15 downto 8);
-- We also need to know the value of the B register, so that we can set the
-- upper byte of the argument. This allows us to treat ZP and ABS modes
-- identically, and simplify some of the address calculatin logic later.
-- This also means that setting B must flush the pipeline by asserting
-- address_redirecting.
regs : in cpu_registers;
stall : in boolean;
-- We also need to know when we are being asked to provide an indirect
-- vector for one of the indirect addressing modes
vector_fetch_address_in : in translated_address;
vector_fetch_transaction_id_in : in unsigned(4 downto 0);
prefetch_ready_to_accept_vector_request : out boolean := true;
vector_fetch_out_transaction_id : out unsigned(4 downto 0);
vector_fetch_out_bytes : out bytes4;
instruction_out : out instruction_information;
instruction_out_valid : out boolean;
branch8_pc : out unsigned(15 downto 0);
branch16_pc : out unsigned(15 downto 0);
branch8_zp_pc : out unsigned(15 downto 0);
-- Interface to memory
fetch_port_write : out fetch_port_in;
fetch_port_read : in fetch_port_out
);
end gs4502b_instruction_prefetch;
architecture behavioural of gs4502b_instruction_prefetch is
signal instruction_address : translated_address := (others => '0');
signal instruction_pc : unsigned(15 downto 0) := x"8100";
signal ilen_buffer : ilens;
signal byte_buffer : unsigned((8*BYTE_BUFFER_WIDTH)-1 downto 0);
signal bytes_ready : integer range 0 to 16 := 0;
signal buffer_address : translated_address := (others => '0');
signal fetch_address : translated_address := (others => '0');
signal burst_fetch : integer range 0 to (BYTE_BUFFER_WIDTH/4+1) := 0;
signal dispatched_bytes : integer range 0 to 7 := 0;
-- And which address are we currently looking for to append to the end of our
-- byte buffer?
signal desired_address : translated_address := (others => '0');
signal ifetch_transaction_counter : unsigned(4 downto 0) := (others => '0');
signal ifetch_expected_transaction_counter : unsigned(4 downto 0) := (others => '0');
signal last_vector_fetch_transaction_id : unsigned(4 downto 0) := (others => '1');
signal vector_fetch_stall_buffer_occupied : boolean := false;
signal vector_fetch_stall_buffer_transaction_id : unsigned(4 downto 0);
signal vector_fetch_stall_buffer_address : translated_address;
-- Delayed signals to tell us which address and values of chip/fast RAM we are
-- reading in a given cycle
signal fetch_buffer_1 : prefetch_buffer;
signal fetch_buffer_now : prefetch_buffer;
signal fetch_buffer_now_valid : boolean := false;
signal opcode_high_bit : std_logic := '1';
signal fetched_last_cycle : boolean := false;
signal end_of_trace : boolean := false;
signal space_for_bytes : boolean := true;
begin
process (cpuclock) is
-- XXX We should just be able to use the definition from visualise.vhdl,
-- but GHDL doesn't recognise that as the necessary type.
-- function visualise(e : string; s : string; v : prefetch_buffer) return boolean is
-- begin
-- report "VISUALISE:" & e & ":" & s & ":prefetch_buffer:"
-- & "[" & to_hstring(v.v(0).byte) & "/" & integer'image(v.v(0).ilen) & ","
-- & to_hstring(v.v(1).byte) & "/" & integer'image(v.v(1).ilen) & ","
-- & to_hstring(v.v(2).byte) & "/" & integer'image(v.v(2).ilen) & ","
-- & to_hstring(v.v(3).byte) & "/" & integer'image(v.v(3).ilen)
-- & "] @ "
-- & to_hstring(v.address)
-- & ", flags=" & to_string(v.user_flags);
-- return true;
-- end function;
variable instruction : instruction_information;
variable bytes : instruction_bytes;
variable next_pc : unsigned(15 downto 0);
variable store_offset : integer range 0 to 15 := 0;
variable consumed_bytes : integer range 0 to 3 := 0;
variable new_bytes_ready : integer range 0 to BYTE_BUFFER_WIDTH := 0;
variable new_byte_buffer : unsigned((8*BYTE_BUFFER_WIDTH)-1 downto 0);
variable new_ilen_buffer : ilens;
variable burst_add_one : boolean := false;
variable burst_sub_one : boolean := false;
variable fetch_port_used : boolean := false;
variable vector_fetch_address : translated_address;
variable vector_fetch_transaction_id : unsigned(4 downto 0);
variable ignored : boolean;
begin
if rising_edge(cpuclock) then
ignored := visualise(entity_name,"current_cpu_personality",current_cpu_personality);
ignored := visualise(entity_name,"address_redirecting",address_redirecting);
ignored := visualise(entity_name,"redirected_address",redirected_address);
ignored := visualise(entity_name,"redirected_pch",redirected_pch);
ignored := visualise(entity_name,"regs",regs);
ignored := visualise(entity_name,"stall",stall);
ignored := visualise(entity_name,"vector_fetch_address_in",vector_fetch_address_in);
ignored := visualise(entity_name,"vector_fetch_transaction_id_in",vector_fetch_transaction_id_in);
ignored := visualise(entity_name,"fetch_port_read",fetch_port_read);
ignored := visualise(entity_name,"instruction_address",instruction_address);
ignored := visualise(entity_name,"instruction_pc",instruction_pc);
ignored := visualise(entity_name,"ilen_buffer",ilen_buffer);
ignored := visualise(entity_name,"byte_buffer",byte_buffer);
ignored := visualise(entity_name,"bytes_ready",bytes_ready);
ignored := visualise(entity_name,"buffer_address",buffer_address);
ignored := visualise(entity_name,"fetch_address",fetch_address);
ignored := visualise(entity_name,"burst_fetch",burst_fetch);
ignored := visualise(entity_name,"dispatched_bytes",dispatched_bytes);
ignored := visualise(entity_name,"desired_address",desired_address);
ignored := visualise(entity_name,"ifetch_transaction_counter",ifetch_transaction_counter);
ignored := visualise(entity_name,"ifetch_expected_transaction_counter",ifetch_expected_transaction_counter);
ignored := visualise(entity_name,"last_vector_fetch_transaction_id",last_vector_fetch_transaction_id);
ignored := visualise(entity_name,"vector_fetch_stall_buffer_occupied",vector_fetch_stall_buffer_occupied);
ignored := visualise(entity_name,"vector_fetch_stall_buffer_transaction_id",vector_fetch_stall_buffer_transaction_id);
ignored := visualise(entity_name,"vector_fetch_stall_buffer_address",vector_fetch_stall_buffer_address);
ignored := visualise(entity_name,"fetch_buffer_1",fetch_buffer_1);
ignored := visualise(entity_name,"fetch_buffer_now",fetch_buffer_now);
ignored := visualise(entity_name,"fetch_buffer_now_valid",fetch_buffer_now_valid);
ignored := visualise(entity_name,"opcode_high_bit",opcode_high_bit);
ignored := visualise(entity_name,"fetched_last_cycle",fetched_last_cycle);
ignored := visualise(entity_name,"end_of_trace",end_of_trace);
ignored := visualise(entity_name,"space_for_bytes",space_for_bytes);
-- Only mark fetch port in use when we push something new to it.
fetch_port_used := false;
-- Handle memory read pipeline.
fetch_buffer_now.address <= fetch_buffer_1.address;
fetch_buffer_now.user_flags <= fetch_buffer_1.user_flags;
for i in 0 to 3 loop
fetch_buffer_now.v(i).byte <= fetch_buffer_1.v(i).byte;
end loop;
-- Tag bytes with instruction lengths
for i in 0 to 3 loop
fetch_buffer_now.v(i).ilen
<= instruction_lengths
.instruction_length(opcode_high_bit&fetch_buffer_1.v(i).byte(7 downto 0));
end loop;
-- Work out if this is the thing we will want next cycle?
-- Bit 5 of user_flags is used to indicate if it is an instruction fetch
-- or indirect address resolution. 0 = instruction fetch, which is what
-- we care about here.
if (std_logic_vector(to_unsigned(coreid+1,2))
= fetch_buffer_1.user_flags(7 downto 6))
and (address_redirecting = false)
and ((fetch_buffer_now_valid
and fetch_buffer_1.user_flags(5) = '0'
and fetch_buffer_1.user_flags(4 downto 0)
= std_logic_vector(ifetch_expected_transaction_counter + 1))
or ((fetch_buffer_now_valid = false)
and fetch_buffer_1.user_flags(5) = '0'
and fetch_buffer_1.user_flags(4 downto 0)
= std_logic_vector(ifetch_expected_transaction_counter))) then
fetch_buffer_now_valid <= true;
else
fetch_buffer_now_valid <= false;
end if;
if (std_logic_vector(to_unsigned(coreid+1,2))
= fetch_buffer_1.user_flags(7 downto 6))
and (address_redirecting = false)
and fetch_buffer_1.user_flags(5) = '1' then
-- We have a vector -- pass it to the VALIDATE stage
vector_fetch_out_transaction_id
<= unsigned(fetch_buffer_1.user_flags(4 downto 0));
vector_fetch_out_bytes <= fetch_port_read.bytes;
end if;
fetch_buffer_1.address <= fetch_port_read.translated;
fetch_buffer_1.user_flags <= fetch_port_read.user_flags;
for i in 0 to 3 loop
fetch_buffer_1.v(i).byte <= fetch_port_read.bytes(i);
end loop;
-- XXX When changing CPU personality, there is a 1 cycle delay before
-- instruction lengths will be correctly calculated. Should be fine, as
-- we will hold CPU during personality change, anyway via
-- address_redirecting interface, which disacrds all instruction buffer
-- contents, and prevents it loading more until released.
if current_cpu_personality = CPU6502 then
opcode_high_bit <= '1';
else
opcode_high_bit <= '0';
end if;
store_offset := bytes_ready;
consumed_bytes := 0;
new_bytes_ready := bytes_ready;
new_byte_buffer := byte_buffer;
new_ilen_buffer := ilen_buffer;
report "I-FETCH" & integer'image(coreid)
& " : Fetching instruction @ $" & to_hstring(instruction_address)
& ", with " & integer'image(bytes_ready) & " bytes available.";
if stall = false then
if bytes_ready < 3 then
instruction_out_valid <= false;
else
-- Work out bytes in instruction, so that we can shift down appropriately.
instruction_out_valid <= true;
consumed_bytes := ilen_buffer(0);
end if;
new_bytes_ready := bytes_ready - consumed_bytes;
case consumed_bytes is
when 1 =>
report "I-FETCH" & integer'image(coreid)
& " $" & to_hstring(instruction_address)
& " : Instruction buffer head contains $"
& to_hstring(byte_buffer(7 downto 0))
& ".";
when 2 =>
report "I-FETCH" & integer'image(coreid)
& " $" & to_hstring(instruction_address)
& " : Instruction buffer head contains $"
& to_hstring(byte_buffer(7 downto 0))
& " $" & to_hstring(byte_buffer(15 downto 8))
& ".";
when others =>
report "I-FETCH" & integer'image(coreid)
& " $" & to_hstring(instruction_address)
& " : Instruction buffer head contains $"
& to_hstring(byte_buffer(7 downto 0))
& " $" & to_hstring(byte_buffer(15 downto 8))
& " $" & to_hstring(byte_buffer(23 downto 16))
& ".";
end case;
end if;
-- Shift buffer down
new_byte_buffer(((BYTE_BUFFER_WIDTH-consumed_bytes)*8-1) downto 0)
:= byte_buffer((BYTE_BUFFER_WIDTH*8-1) downto (consumed_bytes*8));
new_ilen_buffer(0 to (BYTE_BUFFER_WIDTH-consumed_bytes))
:= ilen_buffer(consumed_bytes to BYTE_BUFFER_WIDTH);
-- Update where we will store, and the number of valid bytes left in
-- the buffer.
store_offset := bytes_ready - consumed_bytes;
-- We are reading for the correct address
report "I-FETCH" & integer'image(coreid)
& " : RAM READING $" & to_hstring(fetch_buffer_now.address)
&" - $" & to_hstring(fetch_buffer_now.address+3) &
", stow offset " & integer'image(store_offset) & ", am hoping for $"
& to_hstring(desired_address)
& " address_redirecting=" & boolean'image(address_redirecting)
& ", reset=" & std_logic'image(reset);
burst_sub_one := false;
burst_add_one := false;
if (address_redirecting = false) and (reset = '0') then
report "I-FETCH" & integer'image(coreid)
& " : Waiting for Tid=$" & to_hstring(to_unsigned(coreid+1,2)&"0"
&ifetch_expected_transaction_counter)
& ", just saw $" & to_hstring(fetch_buffer_now.user_flags);
if fetch_buffer_now_valid then
-- But make sure we don't over flow our read queue
report "I-FETCH: Found the bytes we were looking for to add to our buffer.";
if space_for_bytes then
report "I-FETCH" & integer'image(coreid)
& " : We have space, so adding to byte_buffer.";
-- Append to the end
for i in 0 to 3 loop
new_byte_buffer((8*(store_offset+i)+7) downto (8*(store_offset+i)))
:= unsigned(fetch_buffer_now.v(i).byte(7 downto 0));
new_ilen_buffer(store_offset+i) := fetch_buffer_now.v(i).ilen;
end loop;
-- update number of bytes available
new_bytes_ready := bytes_ready - consumed_bytes + 4;
report "I-FETCH" & integer'image(coreid)
& " : Adding 4 to (bytes_ready-consumed_bytes) to calculate new_bytes_ready";
-- Read next 4 bytes: this happens through next block, which has a
-- nice new burst fetch process, to keep the buffer filled.
desired_address <= desired_address + 4;
ifetch_expected_transaction_counter
<= ifetch_expected_transaction_counter + 1;
report "I-FETCH" & integer'image(coreid)
& " : desired_address <= $" & to_hstring(desired_address + 4);
end if;
else
report "I-FETCH" & integer'image(coreid)
& " : Wrong bytes presented : desired_address = $" & to_hstring(desired_address)
& ", fetch_buffer_now.address=$" & to_hstring(fetch_buffer_now.address);
end if;
-- Keep the instruction buffer as full as possible, without overflowing.
if dispatched_bytes < 4 then
dispatched_bytes <= dispatched_bytes + consumed_bytes;
else
dispatched_bytes <= dispatched_bytes + consumed_bytes - 4;
burst_add_one := true;
report "I-FETCH" & integer'image(coreid)
& " : Ate 4 bytes, queuing next instruction word read.";
end if;
report "I-FETCH" & integer'image(coreid)
& " : burst_fetch = " & integer'image(burst_fetch)
& ", burst_add_one = " & boolean'image(burst_add_one)
& ", burst_sub_one = " & boolean'image(burst_sub_one);
if (burst_fetch > 0) and (not end_of_trace)
and (not address_redirecting) then
report "I-FETCH" & integer'image(coreid)
& " : Requesting next instruction word (" & integer'image(burst_fetch)
& " more to go, ready="
& boolean'image(fetch_port_read.ready) &
").";
if vector_fetch_transaction_id = last_vector_fetch_transaction_id
or fetch_port_read.ready or (primary_core_boost and (coreid=0)) then
-- After this cycle, we will be able to accept another indirect vector
-- request. Else not.
if vector_fetch_stall_buffer_occupied = false then
prefetch_ready_to_accept_vector_request <= true;
else
vector_fetch_stall_buffer_occupied <= false;
end if;
else
prefetch_ready_to_accept_vector_request <= false;
if (to_integer(vector_fetch_transaction_id) /= to_integer(last_vector_fetch_transaction_id))
and vector_fetch_stall_buffer_occupied = false then
vector_fetch_stall_buffer_occupied <= true;
vector_fetch_stall_buffer_address <= vector_fetch_address_in;
vector_fetch_stall_buffer_transaction_id <= vector_fetch_transaction_id_in;
end if;
end if;
if vector_fetch_stall_buffer_occupied then
vector_fetch_address := vector_fetch_stall_buffer_address;
vector_fetch_transaction_id := vector_fetch_stall_buffer_transaction_id;
else
vector_fetch_address := vector_fetch_address_in;
vector_fetch_transaction_id := vector_fetch_transaction_id_in;
end if;
if (to_integer(vector_fetch_transaction_id) /= to_integer(last_vector_fetch_transaction_id))
and (fetch_port_read.ready or (primary_core_boost and (coreid=0))) then
-- Indirect vector fetch
report "FETCH" & integer'image(coreid)
& " port ready: Asking for INDIRECT VECTOR at $"
& to_hstring(vector_fetch_address)
& " as Tid $" & to_hstring(to_unsigned(coreid+1,2)&"1"&
vector_fetch_transaction_id)
& " (last vector Tid was $" & to_hstring(to_unsigned(coreid+1,2)&"1"&
last_vector_fetch_transaction_id)
& ").";
last_vector_fetch_transaction_id <= vector_fetch_transaction_id;
fetch_port_write.valid <= true;
fetch_port_write.translated
<= vector_fetch_address;
-- Put our Core ID in the upper bits
fetch_port_write.user_flags(7 downto 6) <=
std_logic_vector(to_unsigned(coreid+1,2));
-- Mark transaction as being vector fetch
fetch_port_write.user_flags(5) <= '1';
-- And finally the transaction number.
fetch_port_write.user_flags(4 downto 0)
<= std_logic_vector(vector_fetch_transaction_id);
fetch_port_used := true;
elsif fetch_port_read.ready or (primary_core_boost and (coreid=0)) then
report "FETCH" & integer'image(coreid)
& " port ready: Asking for $"
& to_hstring(fetch_address + 4)
& " as Tid $" & to_hstring(to_unsigned(coreid+1,2)&"0"&
ifetch_transaction_counter);
fetch_port_write.valid <= true;
fetch_port_write.translated <= fetch_address + 4;
-- Put our Core ID in the upper bits
fetch_port_write.user_flags(7 downto 6) <=
std_logic_vector(to_unsigned(coreid+1,2));
-- Mark transaction as being instruction fetch
fetch_port_write.user_flags(5) <= '0';
-- And finally the transaction number.
fetch_port_write.user_flags(4 downto 0)
<= std_logic_vector(ifetch_transaction_counter);
-- Now update our transaction numbers
ifetch_transaction_counter <= ifetch_transaction_counter + 1;
fetch_address <= fetch_address + 4;
fetch_port_used := true;
if (burst_add_one = false) then
report "I-FETCH" & integer'image(coreid)
& " : Decrementing burst_fetch to "
& integer'image(burst_fetch-1) & ", fetching $"
& to_hstring(fetch_address + 4) & ", desired_address=$"
& to_hstring(desired_address);
burst_fetch <= burst_fetch - 1;
else
report "I-FETCH" & integer'image(coreid)
& " : Holding burst_fetch";
end if;
else
report "I-FETCH" & integer'image(coreid)
& " : FETCH port NOT ready, so holding burst_fetch";
end if;
elsif (burst_add_one = true) then
report "I-FETCH" & integer'image(coreid)
& " : Incrementing burst_fetch to " & integer'image(burst_fetch + 1);
if burst_fetch < (BYTE_BUFFER_WIDTH/4+1) then
burst_fetch <= burst_fetch + 1;
end if;
end if;
-- Make sure that we don't get stuck forever waiting for bytes
-- XXX This should never be needed, and just results in a core placing
-- unnecessary demands on the memory bandwidth
-- if (bytes_ready < 4) and (burst_fetch = 0) then
-- report "I-FETCH" & integer'image(coreid)
-- & " : Empty buffer: resetting burst_fetch to " & integer'image((BYTE_BUFFER_WIDTH/4+1));
-- burst_fetch <= (BYTE_BUFFER_WIDTH/4+1);
-- end if;
report "I-FETCH" & integer'image(coreid)
& " buffer was " & to_hstring(byte_buffer)
&", now " & to_hstring(new_byte_buffer)
&", with " & integer'image(new_bytes_ready)
& " bytes ready, " & integer'image(consumed_bytes) & " bytes consumed "
& "(burst_fetch = " & integer'image(burst_fetch) & ").";
byte_buffer <= new_byte_buffer;
ilen_buffer <= new_ilen_buffer;
bytes_ready <= new_bytes_ready;
if new_bytes_ready < (BYTE_BUFFER_WIDTH-4) then
space_for_bytes <= true;
else
space_for_bytes <= false;
end if;
buffer_address <= buffer_address + consumed_bytes;
report "I-FETCH" & integer'image(coreid)
& " : Updated: new_bytes_ready = " & integer'image(new_bytes_ready)
& ", consumed_bytes = " & integer'image(consumed_bytes);
instruction_address <= instruction_address + consumed_bytes;
instruction_pc <= instruction_pc + consumed_bytes;
end if;
if (address_redirecting = true) and (reset = '0') then
report "$" & to_hstring(instruction_address) &
" PREFETCH" & integer'image(coreid)
& " : "
& "redirection requested to $" & to_hstring(redirected_address);
-- Starting to pursue a new trace, fetch until we hit a non-conditional
-- branch.
end_of_trace <= false;
instruction_address <= redirected_address;
instruction_pc(15 downto 8) <= redirected_pch;
instruction_pc(7 downto 0) <= redirected_address(7 downto 0);
-- Invalidate current buffer
bytes_ready <= 0;
space_for_bytes <= true;
-- And remember that we can fetch several words at once.
-- Enough to fill, plus one waiting in the wings.
burst_fetch <= (BYTE_BUFFER_WIDTH / 4) + 1;
-- And reset the bytes eaten counter that we use to decide when to load
-- the next word.
dispatched_bytes <= 0;
-- Start reading from this address.
-- Clobber any other address we have asked for, as anything else we
-- were waiting for is not redundant.
fetch_port_write.valid <= true;
fetch_port_write.translated <= redirected_address;
fetch_port_used := true;
fetch_port_write.user_flags <=
std_logic_vector(to_unsigned(coreid+1,2)&"0"&
ifetch_transaction_counter);
report "FETCH" & integer'image(coreid)
& " port ready: Due to redirection, asking for $"
& to_hstring(redirected_address)
& " as Tid $" & to_hstring(to_unsigned(coreid+1,2)&"0"&
ifetch_transaction_counter);
-- Set the transaction ID we will be waiting for for this data
ifetch_expected_transaction_counter <= ifetch_transaction_counter;
ifetch_transaction_counter <= ifetch_transaction_counter + 1;
fetch_address <= redirected_address;
desired_address <= redirected_address;
report "I-FETCH" & integer'image(coreid)
& " : desired_address <= $" & to_hstring(redirected_address);
else
-- Otherwise, keep fetching from where we were.
end if;
-- Work out whether we can request more instructions next cycle?
-- This approach avoids the need for any buffers, but it does mean that
-- we can only fetch an instruction every other cycle. However, for Core0,
-- we know that we have the highest priority access to the memory controller,
-- so we can always fetch.
-- For the other cores, this means that we are limited to fetching at
-- most 2 instruction bytes per cycle on average (4 per 2 cycles), so
-- there will be some pipeline stalling due to fetch delays on those
-- cores if they use too many 3-byte instructions in rapid succession.
-- This could be avoided by buffering the fetch ports 1 - 3, so that we
-- get a one-cycle warning, and can stop fetching when we know that it is
-- busy. The trade-off would then be one extra cycle of instruction
-- fetch latency on ports 1-3. We can work out the best trade-off there
-- later.
fetched_last_cycle <= fetch_port_used;
if (coreid = 0) and primary_core_boost then
if (not fetch_port_used) or (reset='1') then
fetch_port_write.valid <= false;
end if;
elsif reset='1' then
fetch_port_write.valid <= false;
end if;
report "$" & to_hstring(instruction_address) & " I-FETCH" & integer'image(coreid);
next_pc := to_unsigned(to_integer(instruction_address(15 downto 0)) + consumed_bytes,16);
if current_cpu_personality = CPU6502 then
instruction.instruction_flags
:= get_instruction_flags("1"&std_logic_vector(byte_buffer(7 downto 0)));
instruction.instruction_extra_flags
:= get_extra_instruction_flags("1"&std_logic_vector(byte_buffer(7 downto 0)));
instruction.addressing_mode
:= get_addressing_modes("1"&std_logic_vector(byte_buffer(7 downto 0)));
else
instruction.instruction_flags
:= get_instruction_flags("0"&std_logic_vector(byte_buffer(7 downto 0)));
instruction.instruction_extra_flags
:= get_extra_instruction_flags("0"&std_logic_vector(byte_buffer(7 downto 0)));
instruction.addressing_mode
:= get_addressing_modes("0"&std_logic_vector(byte_buffer(7 downto 0)));
end if;
instruction.modifies_cpu_personality := false;
instruction.cpu_personality := current_cpu_personality;
instruction.bytes.opcode := byte_buffer(7 downto 0);
instruction.bytes.arg1 := byte_buffer(15 downto 8);
if consumed_bytes /= 3 then
-- Set upper byte of address field to B register, so that we can treat
-- ZP and ABS addressing modes equivalently. (Also gives us the option
-- of having another CPU personality that allows (ABS),Y etc).
instruction.bytes.arg2 := regs.b;
else
instruction.bytes.arg2 := byte_buffer(23 downto 16);
end if;
if (address_redirecting = false) and
(instruction.instruction_flags.do_branch
and (not instruction.instruction_flags.do_branch_conditional)) then
-- Found a non-conditional branch instruction, so there is no point continuing
-- to burst fetch
burst_fetch <= 0;
end_of_trace <= true;
end if;
if bytes_ready > 2 then
-- If we have more than 2 bytes, we know we have enough for any instruction
instruction.translated := instruction_address;
report "FETCH" & integer'image(coreid)
& " setting instruction.translated to $ " & to_hstring(instruction_address);
else
-- Magic value so that we can see when there were insufficient bytes.
-- XXX Should be (others => '1') when testing complete.
instruction.translated := x"FF0B1435";
report "FETCH" & integer'image(coreid)
& " invalidating instruction.translated";
end if;
instruction.pc := instruction_pc;
instruction.pc_expected := next_pc;
instruction.pc_mispredict := next_pc;
instruction.branch_predict := false;
if stall = false then
-- Work out possible PC values for JMP/JSR, as well as 8 and 16 bit
-- branch options.
branch8_pc <=
to_unsigned(65538 + to_integer(instruction_pc) + to_integer(
byte_buffer(15)&byte_buffer(15)&byte_buffer(15)&byte_buffer(15)&
byte_buffer(15)&byte_buffer(15)&byte_buffer(15)&byte_buffer(15)&
byte_buffer(15 downto 8)),16);
branch16_pc <=
to_unsigned(65539 + to_integer(instruction_pc) + to_integer(byte_buffer(23 downto 8)),16);
-- For those bizarre BBR/BBS instructions, where the branch is from the
-- 3rd byte of the instruction, not the 2nd
branch8_zp_pc <=
to_unsigned(65539 + to_integer(instruction_pc) + to_integer(
byte_buffer(23)&byte_buffer(23)&byte_buffer(23)&byte_buffer(23)&
byte_buffer(23)&byte_buffer(23)&byte_buffer(23)&byte_buffer(23)&
byte_buffer(23 downto 16)),16);
instruction_out <= instruction;
end if;
end if;
end process;
end behavioural;