Optimize HybridAllocator (#2637)

* Store constant `Operand`s in the `LocalInfo`

Since the spill slot and register assigned is fixed, we can just store
the `Operand` reference in the `LocalInfo` struct. This allows skipping
hitting the intern-table for a look up.

* Skip `Uses`/`Assignments` management

Since the `HybridAllocator` is the last pass and we do not care about
uses/assignments we can skip managing that when setting destinations or
sources.

* Make `GetLocalInfo` inlineable

Also fix a possible issue where with numbered locals. See or-assignment
operator in `SetVisited(local)` before patch.

* Do not run `BlockPlacement` in LCQ

With the host mapped memory manager, there is a lot less cold code to
split from hot code. So disabling this in LCQ gives some extra
throughput - where we need it.

* Address Mou-Ikkai's feedback

* Apply suggestions from code review

Co-authored-by: VocalFan <45863583+Mou-Ikkai@users.noreply.github.com>

* Move check to an assert

Co-authored-by: VocalFan <45863583+Mou-Ikkai@users.noreply.github.com>
This commit is contained in:
FICTURE7 2021-09-29 03:38:37 +04:00 committed by GitHub
parent 1ae690ba2f
commit 312be74861
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 85 additions and 83 deletions

View File

@ -3,6 +3,7 @@ using ARMeilleure.Translation;
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
@ -10,9 +11,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
class HybridAllocator : IRegisterAllocator
{
private const int RegistersCount = 16;
private const int MaxIROperands = 4;
private struct BlockInfo
{
public bool HasCall { get; }
@ -32,10 +30,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
public int Uses { get; set; }
public int UsesAllocated { get; set; }
public int Register { get; set; }
public int SpillOffset { get; set; }
public int Sequence { get; set; }
public Operand Temp { get; set; }
public Operand Register { get; set; }
public Operand SpillOffset { get; set; }
public OperandType Type { get; }
private int _first;
@ -49,10 +47,10 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
Type = type;
UsesAllocated = 0;
Register = 0;
SpillOffset = 0;
Sequence = 0;
Temp = default;
Register = default;
SpillOffset = default;
_first = -1;
_last = -1;
@ -74,6 +72,38 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
}
private const int MaxIROperands = 4;
// The "visited" state is stored in the MSB of the local's value.
private const ulong VisitedMask = 1ul << 63;
private BlockInfo[] _blockInfo;
private LocalInfo[] _localInfo;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsVisited(Operand local)
{
Debug.Assert(local.Kind == OperandKind.LocalVariable);
return (local.GetValueUnsafe() & VisitedMask) != 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void SetVisited(Operand local)
{
Debug.Assert(local.Kind == OperandKind.LocalVariable);
local.GetValueUnsafe() |= VisitedMask;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private ref LocalInfo GetLocalInfo(Operand local)
{
Debug.Assert(local.Kind == OperandKind.LocalVariable);
Debug.Assert(IsVisited(local), "Local variable not visited. Used before defined?");
return ref _localInfo[(uint)local.GetValueUnsafe() - 1];
}
public AllocationResult RunPass(ControlFlowGraph cfg, StackAllocator stackAlloc, RegisterMasks regMasks)
{
int intUsedRegisters = 0;
@ -82,35 +112,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
int intFreeRegisters = regMasks.IntAvailableRegisters;
int vecFreeRegisters = regMasks.VecAvailableRegisters;
var blockInfo = new BlockInfo[cfg.Blocks.Count];
var localInfo = new LocalInfo[cfg.Blocks.Count * 3];
_blockInfo = new BlockInfo[cfg.Blocks.Count];
_localInfo = new LocalInfo[cfg.Blocks.Count * 3];
int localInfoCount = 0;
// The "visited" state is stored in the MSB of the local's value.
const ulong VisitedMask = 1ul << 63;
bool IsVisited(Operand local)
{
return (local.GetValueUnsafe() & VisitedMask) != 0;
}
void SetVisited(Operand local)
{
local.GetValueUnsafe() |= VisitedMask | (uint)++localInfoCount;
}
ref LocalInfo GetLocalInfo(Operand local)
{
Debug.Assert(local.Kind == OperandKind.LocalVariable);
if (!IsVisited(local))
{
throw new InvalidOperationException("Local was not visisted yet. Used before defined?");
}
return ref localInfo[(uint)local.GetValueUnsafe() - 1];
}
for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
{
BasicBlock block = cfg.PostOrderBlocks[index];
@ -127,10 +133,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
hasCall = true;
}
for (int i = 0; i < node.SourcesCount; i++)
foreach (Operand source in node.SourcesUnsafe)
{
Operand source = node.GetSource(i);
if (source.Kind == OperandKind.LocalVariable)
{
GetLocalInfo(source).SetBlockIndex(block.Index);
@ -151,10 +155,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
}
for (int i = 0; i < node.DestinationsCount; i++)
foreach (Operand dest in node.DestinationsUnsafe)
{
Operand dest = node.GetDestination(i);
if (dest.Kind == OperandKind.LocalVariable)
{
if (IsVisited(dest))
@ -163,13 +165,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
else
{
SetVisited(dest);
dest.NumberLocal(++localInfoCount);
if (localInfoCount > localInfo.Length)
if (localInfoCount > _localInfo.Length)
{
Array.Resize(ref localInfo, localInfoCount * 2);
Array.Resize(ref _localInfo, localInfoCount * 2);
}
SetVisited(dest);
GetLocalInfo(dest) = new LocalInfo(dest.Type, UsesCount(dest), block.Index);
}
}
@ -187,7 +190,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
}
blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters);
_blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters);
}
int sequence = 0;
@ -196,7 +199,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
BasicBlock block = cfg.PostOrderBlocks[index];
BlockInfo blkInfo = blockInfo[block.Index];
ref BlockInfo blkInfo = ref _blockInfo[block.Index];
int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters;
int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters;
@ -227,23 +230,23 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
Debug.Assert(info.UsesAllocated <= info.Uses);
if (info.Register != -1)
if (info.Register != default)
{
Operand reg = Register(info.Register, local.Type.ToRegisterType(), local.Type);
if (info.UsesAllocated == info.Uses)
{
Register reg = info.Register.GetRegister();
if (local.Type.IsInteger())
{
intLocalFreeRegisters |= 1 << info.Register;
intLocalFreeRegisters |= 1 << reg.Index;
}
else
{
vecLocalFreeRegisters |= 1 << info.Register;
vecLocalFreeRegisters |= 1 << reg.Index;
}
}
return reg;
return info.Register;
}
else
{
@ -259,7 +262,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
info.Temp = temp;
}
Operation fillOp = Operation(Instruction.Fill, temp, Const(info.SpillOffset));
Operation fillOp = Operation(Instruction.Fill, temp, info.SpillOffset);
block.Operations.AddBefore(node, fillOp);
@ -279,9 +282,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
ref LocalInfo info = ref GetLocalInfo(source);
if (info.Register == -1)
if (info.Register == default)
{
Operation fillOp = Operation(Instruction.Fill, node.Destination, Const(info.SpillOffset));
Operation fillOp = Operation(Instruction.Fill, node.Destination, info.SpillOffset);
block.Operations.AddBefore(node, fillOp);
block.Operations.Remove(node);
@ -295,13 +298,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
if (!folded)
{
for (int i = 0; i < node.SourcesCount; i++)
foreach (ref Operand source in node.SourcesUnsafe)
{
Operand source = node.GetSource(i);
if (source.Kind == OperandKind.LocalVariable)
{
node.SetSource(i, AllocateRegister(source));
source = AllocateRegister(source);
}
else if (source.Kind == OperandKind.Memory)
{
@ -323,10 +324,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
int intLocalAsg = 0;
int vecLocalAsg = 0;
for (int i = 0; i < node.DestinationsCount; i++)
foreach (ref Operand dest in node.DestinationsUnsafe)
{
Operand dest = node.GetDestination(i);
if (dest.Kind != OperandKind.LocalVariable)
{
continue;
@ -344,7 +343,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
int selectedReg = BitOperations.TrailingZeroCount(mask);
info.Register = selectedReg;
info.Register = Register(selectedReg, info.Type.ToRegisterType(), info.Type);
if (dest.Type.IsInteger())
{
@ -359,8 +358,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
else
{
info.Register = -1;
info.SpillOffset = stackAlloc.Allocate(dest.Type.GetSizeInBytes());
info.Register = default;
info.SpillOffset = Const(stackAlloc.Allocate(dest.Type.GetSizeInBytes()));
}
}
@ -368,9 +367,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
Debug.Assert(info.UsesAllocated <= info.Uses);
if (info.Register != -1)
if (info.Register != default)
{
node.SetDestination(i, Register(info.Register, dest.Type.ToRegisterType(), dest.Type));
dest = info.Register;
}
else
{
@ -386,9 +385,9 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
info.Temp = temp;
}
node.SetDestination(i, temp);
dest = temp;
Operation spillOp = Operation(Instruction.Spill, default, Const(info.SpillOffset), temp);
Operation spillOp = Operation(Instruction.Spill, default, info.SpillOffset, temp);
block.Operations.AddAfter(node, spillOp);

View File

@ -97,15 +97,17 @@ namespace ARMeilleure.CodeGen.X86
Logger.StartPass(PassName.Optimization);
if ((cctx.Options & CompilerOptions.SsaForm) != 0 &&
(cctx.Options & CompilerOptions.Optimize) != 0)
if (cctx.Options.HasFlag(CompilerOptions.Optimize))
{
if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
{
Optimizer.RunPass(cfg);
}
X86Optimizer.RunPass(cfg);
BlockPlacement.RunPass(cfg);
}
X86Optimizer.RunPass(cfg);
Logger.EndPass(PassName.Optimization, cfg);
@ -119,14 +121,14 @@ namespace ARMeilleure.CodeGen.X86
Logger.StartPass(PassName.RegisterAllocation);
if ((cctx.Options & CompilerOptions.SsaForm) != 0)
if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
{
Ssa.Deconstruct(cfg);
}
IRegisterAllocator regAlloc;
if ((cctx.Options & CompilerOptions.Lsra) != 0)
if (cctx.Options.HasFlag(CompilerOptions.Lsra))
{
regAlloc = new LinearScanAllocator();
}

View File

@ -146,6 +146,7 @@ namespace ARMeilleure.IntermediateRepresentation
return BitConverter.Int64BitsToDouble((long)Value);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal ref ulong GetValueUnsafe()
{
return ref _data->Value;

View File

@ -53,8 +53,8 @@ namespace ARMeilleure.IntermediateRepresentation
public int DestinationsCount => _data->DestinationsCount;
public int SourcesCount => _data->SourcesCount;
private Span<Operand> Destinations => new(_data->Destinations, _data->DestinationsCount);
private Span<Operand> Sources => new(_data->Sources, _data->SourcesCount);
internal Span<Operand> DestinationsUnsafe => new(_data->Destinations, _data->DestinationsCount);
internal Span<Operand> SourcesUnsafe => new(_data->Sources, _data->SourcesCount);
public PhiOperation AsPhi()
{
@ -65,17 +65,17 @@ namespace ARMeilleure.IntermediateRepresentation
public Operand GetDestination(int index)
{
return Destinations[index];
return DestinationsUnsafe[index];
}
public Operand GetSource(int index)
{
return Sources[index];
return SourcesUnsafe[index];
}
public void SetDestination(int index, Operand dest)
{
ref Operand curDest = ref Destinations[index];
ref Operand curDest = ref DestinationsUnsafe[index];
RemoveAssignment(curDest);
AddAssignment(dest);
@ -85,7 +85,7 @@ namespace ARMeilleure.IntermediateRepresentation
public void SetSource(int index, Operand src)
{
ref Operand curSrc = ref Sources[index];
ref Operand curSrc = ref SourcesUnsafe[index];
RemoveUse(curSrc);
AddUse(src);
@ -274,8 +274,8 @@ namespace ARMeilleure.IntermediateRepresentation
EnsureCapacity(ref result._data->Destinations, ref result._data->DestinationsCount, destCount);
EnsureCapacity(ref result._data->Sources, ref result._data->SourcesCount, srcCount);
result.Destinations.Clear();
result.Sources.Clear();
result.DestinationsUnsafe.Clear();
result.SourcesUnsafe.Clear();
return result;
}