d593fa3b48b88dcac531d5e87b4a513f369dff95
1 //===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 ///
12 /// This pass loads scratch pointer and scratch offset into a register or a
13 /// frame index which can be used anywhere in the program. These values will
14 /// be used for spilling VGPRs.
15 ///
16 //===----------------------------------------------------------------------===//
18 #include "AMDGPU.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIDefines.h"
21 #include "SIInstrInfo.h"
22 #include "SIMachineFunctionInfo.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/LLVMContext.h"
32 using namespace llvm;
34 namespace {
36 class SIPrepareScratchRegs : public MachineFunctionPass {
38 private:
39 static char ID;
41 public:
42 SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
44 bool runOnMachineFunction(MachineFunction &MF) override;
46 const char *getPassName() const override {
47 return "SI prepare scratch registers";
48 }
50 };
52 } // End anonymous namespace
54 char SIPrepareScratchRegs::ID = 0;
56 FunctionPass *llvm::createSIPrepareScratchRegs() {
57 return new SIPrepareScratchRegs();
58 }
60 bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
61 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
62 const SIInstrInfo *TII =
63 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
64 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
65 MachineRegisterInfo &MRI = MF.getRegInfo();
66 MachineFrameInfo *FrameInfo = MF.getFrameInfo();
67 MachineBasicBlock *Entry = MF.begin();
68 MachineBasicBlock::iterator I = Entry->begin();
69 DebugLoc DL = I->getDebugLoc();
71 // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
72 // run this pass.
73 if (!MFI->hasSpilledVGPRs())
74 return false;
76 unsigned ScratchPtrPreloadReg =
77 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
78 unsigned ScratchOffsetPreloadReg =
79 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
81 if (!Entry->isLiveIn(ScratchPtrPreloadReg))
82 Entry->addLiveIn(ScratchPtrPreloadReg);
84 if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
85 Entry->addLiveIn(ScratchOffsetPreloadReg);
87 // Load the scratch offset.
88 unsigned ScratchOffsetReg =
89 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
90 int ScratchOffsetFI = -1;
92 if (ScratchOffsetReg != AMDGPU::NoRegister) {
93 // Found an SGPR to use
94 MRI.setPhysRegUsed(ScratchOffsetReg);
95 BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
96 .addReg(ScratchOffsetPreloadReg);
97 } else {
98 // No SGPR is available, we must spill.
99 ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
100 BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
101 .addReg(ScratchOffsetPreloadReg)
102 .addFrameIndex(ScratchOffsetFI);
103 }
106 // Now that we have the scratch pointer and offset values, we need to
107 // add them to all the SI_SPILL_V* instructions.
109 RegScavenger RS;
110 unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
111 RS.addScavengingFrameIndex(ScratchRsrcFI);
113 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
114 BI != BE; ++BI) {
116 MachineBasicBlock &MBB = *BI;
117 // Add the scratch offset reg as a live-in so that the register scavenger
118 // doesn't re-use it.
119 if (!MBB.isLiveIn(ScratchOffsetReg))
120 MBB.addLiveIn(ScratchOffsetReg);
121 RS.enterBasicBlock(&MBB);
123 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
124 I != E; ++I) {
125 MachineInstr &MI = *I;
126 RS.forward(I);
127 DebugLoc DL = MI.getDebugLoc();
128 switch(MI.getOpcode()) {
129 default: break;
130 case AMDGPU::SI_SPILL_V512_SAVE:
131 case AMDGPU::SI_SPILL_V256_SAVE:
132 case AMDGPU::SI_SPILL_V128_SAVE:
133 case AMDGPU::SI_SPILL_V96_SAVE:
134 case AMDGPU::SI_SPILL_V64_SAVE:
135 case AMDGPU::SI_SPILL_V32_SAVE:
136 case AMDGPU::SI_SPILL_V32_RESTORE:
137 case AMDGPU::SI_SPILL_V64_RESTORE:
138 case AMDGPU::SI_SPILL_V128_RESTORE:
139 case AMDGPU::SI_SPILL_V256_RESTORE:
140 case AMDGPU::SI_SPILL_V512_RESTORE:
142 // Scratch resource
143 unsigned ScratchRsrcReg =
144 RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
146 uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
147 0xffffffff; // Size
149 unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
150 unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
151 unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
152 unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
154 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
155 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
156 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
158 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
159 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
160 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
162 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
163 .addImm(Rsrc & 0xffffffff)
164 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
166 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
167 .addImm(Rsrc >> 32)
168 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
170 // Scratch Offset
171 if (ScratchOffsetReg == AMDGPU::NoRegister) {
172 ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
173 BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
174 ScratchOffsetReg)
175 .addFrameIndex(ScratchOffsetFI)
176 .addReg(AMDGPU::NoRegister)
177 .addReg(AMDGPU::NoRegister);
178 } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
179 MBB.addLiveIn(ScratchOffsetReg);
180 }
182 if (ScratchRsrcReg == AMDGPU::NoRegister ||
183 ScratchOffsetReg == AMDGPU::NoRegister) {
184 LLVMContext &Ctx = MF.getFunction()->getContext();
185 Ctx.emitError("ran out of SGPRs for spilling VGPRs");
186 ScratchRsrcReg = AMDGPU::SGPR0;
187 ScratchOffsetReg = AMDGPU::SGPR0;
188 }
189 MI.getOperand(2).setReg(ScratchRsrcReg);
190 MI.getOperand(2).setIsKill(true);
191 MI.getOperand(2).setIsUndef(false);
192 MI.getOperand(3).setReg(ScratchOffsetReg);
193 MI.getOperand(3).setIsUndef(false);
194 MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
195 MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
196 MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
197 MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
199 break;
200 }
201 }
202 }
203 return true;
204 }