Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@ inline std::optional<int64_t> matchConstant(Register Reg,
return getIConstantVRegSExtVal(Reg, MRI);
}

template <>
inline std::optional<ValueAndVReg> matchConstant(Register Reg,
const MachineRegisterInfo &MRI) {
return getIConstantVRegValWithLookThrough(Reg, MRI);
}

template <>
inline std::optional<FPValueAndVReg> matchConstant(Register Reg,
const MachineRegisterInfo &MRI) {
return getFConstantVRegValWithLookThrough(Reg, MRI);
}

template <typename ConstT> struct ConstantMatch {
ConstT &CR;
ConstantMatch(ConstT &C) : CR(C) {}
Expand Down
120 changes: 35 additions & 85 deletions llvm/lib/Target/Z80/GISel/Z80InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1992,119 +1992,69 @@ bool Z80InstructionSelector::selectAddSub(MachineInstr &I,
bool Is24Bit = (TySize == 24);
bool IsSub = (Opc == TargetOpcode::G_SUB);

MachineIRBuilder MIB(I);

// INC/DEC optimization: For ±1, use INC/DEC which accept R16/R24 class
// (any register) instead of requiring the accumulator class.
auto ConstSrc2 = getIConstantVRegValWithLookThrough(Src2Reg, MRI);
if (ConstSrc2) {
bool UseInc = false;
bool UseDec = false;

if (IsSub) {
if (ConstSrc2->Value.isOne())
UseDec = true;
else if (ConstSrc2->Value.isAllOnes())
UseInc = true;
if (ConstSrc2->Value.isOne()) UseDec = true;
else if (ConstSrc2->Value.isAllOnes()) UseInc = true;
} else {
if (ConstSrc2->Value.isOne())
UseInc = true;
else if (ConstSrc2->Value.isAllOnes())
UseDec = true;
if (ConstSrc2->Value.isOne()) UseInc = true;
else if (ConstSrc2->Value.isAllOnes()) UseDec = true;
}

if (UseInc || UseDec) {
unsigned IncDecOpc;
const TargetRegisterClass *RC;
if (Is24Bit) {
IncDecOpc = UseInc ? Z80::INC24r : Z80::DEC24r;
RC = &Z80::R24RegClass;
} else {
IncDecOpc = UseInc ? Z80::INC16r : Z80::DEC16r;
RC = &Z80::R16RegClass;
}

MachineIRBuilder MIB(I);
unsigned IncDecOpc = Is24Bit ? (UseInc ? Z80::INC24r : Z80::DEC24r)
: (UseInc ? Z80::INC16r : Z80::DEC16r);
const TargetRegisterClass *RC = Is24Bit ? &Z80::R24RegClass : &Z80::R16RegClass;
auto IncDecI = MIB.buildInstr(IncDecOpc, {DstReg}, {Src1Reg});
if (!RBI.constrainGenericRegister(DstReg, *RC, MRI))
return false;
if (!RBI.constrainGenericRegister(Src1Reg, *RC, MRI))
if (!RBI.constrainGenericRegister(DstReg, *RC, MRI) ||
!RBI.constrainGenericRegister(Src1Reg, *RC, MRI) ||
!constrainSelectedInstRegOperands(*IncDecI, TII, TRI, RBI))
return false;
I.eraseFromParent();
return constrainSelectedInstRegOperands(*IncDecI, TII, TRI, RBI);
return true;
}
}

// Determine physical accumulator register and operand register class
Register PhysAccum = Is24Bit ? Z80::UHL : Z80::HL;
// Determine accumulator and operand register classes
const TargetRegisterClass *AccumRC = Is24Bit ? &Z80::A24RegClass : &Z80::A16RegClass;
const TargetRegisterClass *OperandRC = Is24Bit ? &Z80::O24RegClass : &Z80::O16RegClass;

MachineBasicBlock &MBB = *I.getParent();
const DebugLoc &DL = I.getDebugLoc();
MachineIRBuilder MIB(I);

// Handle Src1: We want to avoid constraining Src1 to AccumRC (HL) directly,
// as this propagates back to loads and forces them to target HL, increasing
// register pressure.
//
// Strategy: Try to constrain Src1 to OperandRC (BC/DE) first. If successful,
// we create a COPY to a new vreg in AccumRC for the operation. If it fails
// (e.g., Src1 is already in a class that doesn't intersect OperandRC),
// fall back to constraining to AccumRC directly.
Register ActualSrc1;
if (RBI.constrainGenericRegister(Src1Reg, *OperandRC, MRI)) {
// Src1 constrained to OperandRC (BC/DE) - need COPY to HL for operation
ActualSrc1 = MRI.createVirtualRegister(AccumRC);
MIB.buildCopy(ActualSrc1, Src1Reg);
} else if (RBI.constrainGenericRegister(Src1Reg, *AccumRC, MRI)) {
// Src1 already compatible with AccumRC (HL) - use directly
ActualSrc1 = Src1Reg;
} else {
// Neither worked - create COPY to AccumRC
ActualSrc1 = MRI.createVirtualRegister(AccumRC);
MIB.buildCopy(ActualSrc1, Src1Reg);
}

// Handle Src2: needs to be in OperandRC (O16/O24)
Register ActualSrc2 = Src2Reg;
if (!RBI.constrainGenericRegister(Src2Reg, *OperandRC, MRI)) {
ActualSrc2 = MRI.createVirtualRegister(OperandRC);
MIB.buildCopy(ActualSrc2, Src2Reg);
}

// Handle Dst: needs to be in AccumRC (A16/A24)
Register ActualDst = DstReg;
bool NeedDstCopy = !RBI.constrainGenericRegister(DstReg, *AccumRC, MRI);
if (NeedDstCopy) {
ActualDst = MRI.createVirtualRegister(AccumRC);
}

// Step 1: COPY Src1 -> physical accumulator (HL/UHL)
BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), PhysAccum)
.addReg(ActualSrc1);

// Step 2: Emit the arithmetic instruction

if (IsSub) {
// Use Sub16ao/Sub24ao pseudo which encapsulates carry-clearing internally.
// This gives the register allocator better liveness information vs
// explicit SCF+CCF+SBC sequence which was creating extra spill slots.
unsigned SubOpc = Is24Bit ? Z80::Sub24ao : Z80::Sub16ao;
BuildMI(MBB, I, DL, TII.get(SubOpc))
.addReg(ActualSrc2);
// for subtraction, we MUST use HL/UHL accumulator
Register PhysAccum = Is24Bit ? Z80::UHL : Z80::HL;
// HL/UHL = COPY Src1
MIB.buildCopy(PhysAccum, Src1Reg);
// Sub Pseudo (implicit use/def HL/UHL)
auto SubI = MIB.buildInstr(Is24Bit ? Z80::Sub24ao : Z80::Sub16ao).addReg(ActualSrc2);
if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI))
return false;
// Dst = COPY HL/UHL
// the destination can be any register in the general class
const TargetRegisterClass *RegRC = Is24Bit ? &Z80::R24RegClass : &Z80::R16RegClass;
MIB.buildCopy(DstReg, PhysAccum);
if (!RBI.constrainGenericRegister(DstReg, *RegRC, MRI))
return false;
} else {
// ADD16ao/ADD24ao: dst = src1 + src2
// for addition, use virtual registers in A16/A24 to allow RA to pick IX/IY
Register ActualSrc1 = MRI.createVirtualRegister(AccumRC);
MIB.buildCopy(ActualSrc1, Src1Reg);
unsigned AddOpc = Is24Bit ? Z80::ADD24ao : Z80::ADD16ao;
BuildMI(MBB, I, DL, TII.get(AddOpc), PhysAccum)
.addReg(PhysAccum)
.addReg(ActualSrc2);
}

// Step 3: COPY physical accumulator -> ActualDst
BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), ActualDst)
.addReg(PhysAccum);

// Step 4: If we couldn't constrain DstReg directly, COPY ActualDst -> DstReg
if (NeedDstCopy) {
MIB.buildCopy(DstReg, ActualDst);
auto AddI = MIB.buildInstr(AddOpc, {DstReg}, {ActualSrc1, ActualSrc2});
if (!constrainSelectedInstRegOperands(*AddI, TII, TRI, RBI))
return false;
}

I.eraseFromParent();
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/Z80/GISel/Z80RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,26 @@ Z80RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
if (Z80::R8RegClass.hasSubClassEq(&RC) ||
Z80::R16RegClass.hasSubClassEq(&RC) ||
Z80::R24RegClass.hasSubClassEq(&RC) ||
Z80::G8RegClass.hasSubClassEq(&RC) ||
Z80::G16RegClass.hasSubClassEq(&RC) ||
Z80::G24RegClass.hasSubClassEq(&RC) ||
Z80::O8RegClass.hasSubClassEq(&RC) ||
Z80::O16RegClass.hasSubClassEq(&RC) ||
Z80::O24RegClass.hasSubClassEq(&RC) ||
Z80::A16RegClass.hasSubClassEq(&RC) ||
Z80::A24RegClass.hasSubClassEq(&RC) ||
Z80::I8RegClass.hasSubClassEq(&RC) ||
Z80::I16RegClass.hasSubClassEq(&RC) ||
Z80::I24RegClass.hasSubClassEq(&RC) ||
Z80::X8RegClass.hasSubClassEq(&RC) ||
Z80::X16RegClass.hasSubClassEq(&RC) ||
Z80::X24RegClass.hasSubClassEq(&RC) ||
Z80::Y8RegClass.hasSubClassEq(&RC) ||
Z80::Y16RegClass.hasSubClassEq(&RC) ||
Z80::Y24RegClass.hasSubClassEq(&RC) ||
Z80::F8RegClass.hasSubClassEq(&RC) ||
Z80::HL16RegClass.hasSubClassEq(&RC) ||
Z80::HL24RegClass.hasSubClassEq(&RC) ||
Z80::Z8RegClass.hasSubClassEq(&RC) ||
Z80::Z16RegClass.hasSubClassEq(&RC) ||
Z80::Z24RegClass.hasSubClassEq(&RC))
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Target/Z80/Z80InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1198,7 +1198,7 @@ bool Z80InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
const TargetRegisterInfo &TRI = getRegisterInfo();
bool Is24Bit = Subtarget.is24Bit();
bool UseLEA = Is24Bit && !MF.getFunction().hasOptSize();
LLVM_DEBUG(dbgs() << "\nZ80InstrInfo::expandPostRAPseudo:"; MI.dump());

switch (unsigned Opc = MI.getOpcode()) {
default:
return false;
Expand Down Expand Up @@ -1287,7 +1287,10 @@ bool Z80InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case Z80::Sub16ao:
case Z80::Sub24ao:
expandPostRAPseudo(*BuildMI(MBB, MI, DL, get(Z80::RCF)));
// directly emit OR A, A to clear carry flag
BuildMI(MBB, MI, DL, get(Z80::OR8ar)).addReg(Z80::A, RegState::Undef)
.addReg(Z80::A, RegState::ImplicitDefine);

MI.setDesc(get(Opc == Z80::Cmp24ao || Opc == Z80::Sub24ao ? Z80::SBC24ao
: Z80::SBC16ao));
MIB.addReg(Z80::F, RegState::Implicit);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/Z80/Z80RegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ def A16 : Z80RC16<(add HL, I16)> {
let AllocationPriority = 16;
let GlobalPriority = true;
}
def HL16 : Z80RC16<(add HL)>;
def HL24 : Z80RC24<(add UHL)>;
def R16 : Z80RC16<(add G16, I16)>;
let CopyCost = -1 in
def Z16 : Z80RC16<(add SPS, AF, UI)>;
Expand Down
20 changes: 0 additions & 20 deletions llvm/test/CodeGen/Z80/add24-phi-accum.mir

This file was deleted.

14 changes: 7 additions & 7 deletions llvm/test/CodeGen/Z80/control.ll
Original file line number Diff line number Diff line change
Expand Up @@ -386,23 +386,23 @@ define i8 @switch(i8) {
; EZ80-NEXT: ld iy, 0
; EZ80-NEXT: add iy, sp
; EZ80-NEXT: ld a, (iy + 3)
; EZ80-NEXT: ld c, 0
; EZ80-NEXT: ld de, 0
; EZ80-NEXT: ld e, 0
; EZ80-NEXT: ld bc, 0
; EZ80-NEXT: cp a, 4
; EZ80-NEXT: jr c, BB11_2
; EZ80-NEXT: ; %bb.1:
; EZ80-NEXT: ld a, -1
; EZ80-NEXT: ret
; EZ80-NEXT: BB11_2:
; EZ80-NEXT: ld e, a
; EZ80-NEXT: ld c, a
; EZ80-NEXT: ld hl, JTI11_0
; EZ80-NEXT: add hl, de
; EZ80-NEXT: add hl, de
; EZ80-NEXT: add hl, de
; EZ80-NEXT: add hl, bc
; EZ80-NEXT: add hl, bc
; EZ80-NEXT: add hl, bc
; EZ80-NEXT: ld hl, (hl)
; EZ80-NEXT: jp (hl)
; EZ80-NEXT: BB11_3:
; EZ80-NEXT: ld a, c
; EZ80-NEXT: ld a, e
; EZ80-NEXT: ret
; EZ80-NEXT: BB11_4:
; EZ80-NEXT: ld a, 2
Expand Down
Loading