From cc331c8f40107934b4bfa36d3646126bc0c4a412 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Thu, 27 Feb 2014 17:47:54 +0000 Subject: [PATCH 1/1] [XCore] Support functions returning more than 4 words. If a function returns a large struct by value return the first 4 words in registers and the rest on the stack in a location reserved by the caller. This is needed to support the xC language which supports functions returning an arbitrary number of return values. This is r202397 reapplied with a fix to avoid an uninitialized read of a member. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202414 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreCallingConv.td | 6 +- lib/Target/XCore/XCoreISelLowering.cpp | 113 ++++++++++++++++---- lib/Target/XCore/XCoreISelLowering.h | 3 + lib/Target/XCore/XCoreInstrInfo.td | 10 +- lib/Target/XCore/XCoreMachineFunctionInfo.h | 15 +++ test/CodeGen/XCore/bigstructret.ll | 39 ++++++- 6 files changed, 160 insertions(+), 26 deletions(-) diff --git a/lib/Target/XCore/XCoreCallingConv.td b/lib/Target/XCore/XCoreCallingConv.td index b20d71f49c..e149e6d9ec 100644 --- a/lib/Target/XCore/XCoreCallingConv.td +++ b/lib/Target/XCore/XCoreCallingConv.td @@ -14,7 +14,11 @@ //===----------------------------------------------------------------------===// def RetCC_XCore : CallingConv<[ // i32 are returned in registers R0, R1, R2, R3 - CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>> + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + + // Integer values get stored in stack slots that are 4 bytes in + // size and 4-byte aligned. + CCIfType<[i32], CCAssignToStack<4, 4>> ]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 765479fa32..67dc19b0aa 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -50,6 +50,7 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper"; case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper"; case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper"; + case XCoreISD::LDWSP : return "XCoreISD::LDWSP"; case XCoreISD::STWSP : return "XCoreISD::STWSP"; case XCoreISD::RETSP : return "XCoreISD::RETSP"; case XCoreISD::LADD : return "XCoreISD::LADD"; @@ -1085,14 +1086,42 @@ LowerCallResult(SDValue Chain, SDValue InFlag, const SmallVectorImpl &RVLocs, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) { - // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), - RVLocs[i].getValVT(), InFlag).getValue(1); - InFlag = Chain.getValue(2); - InVals.push_back(Chain.getValue(0)); + SmallVector, 4> ResultMemLocs; + // Copy results out of physical registers. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + const CCValAssign &VA = RVLocs[i]; + if (VA.isRegLoc()) { + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getValVT(), + InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } else { + assert(VA.isMemLoc()); + ResultMemLocs.push_back(std::make_pair(VA.getLocMemOffset(), + InVals.size())); + // Reserve space for this result. + InVals.push_back(SDValue()); + } } + // Copy results out of memory. + SmallVector MemOpChains; + for (unsigned i = 0, e = ResultMemLocs.size(); i != e; ++i) { + int offset = ResultMemLocs[i].first; + unsigned index = ResultMemLocs[i].second; + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + SDValue Ops[] = { Chain, DAG.getConstant(offset / 4, MVT::i32) }; + SDValue load = DAG.getNode(XCoreISD::LDWSP, dl, VTs, Ops, 2); + InVals[index] = load; + MemOpChains.push_back(load.getValue(1)); + } + + // Transform all loads nodes into one single node because + // all load nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + return Chain; } @@ -1121,8 +1150,15 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, CCInfo.AnalyzeCallOperands(Outs, CC_XCore); + SmallVector RVLocs; + // Analyze return values to determine the number of bytes of stack required. + CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), 4); + RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore); + // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); + unsigned NumBytes = RetCCInfo.getNextStackOffset(); Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy(), true), dl); @@ -1218,12 +1254,6 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, InFlag, dl); InFlag = Chain.getValue(1); - // Assign locations to each value returned by this call. - SmallVector RVLocs; - CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore); - // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, RVLocs, dl, DAG, InVals); @@ -1274,6 +1304,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + XCoreFunctionInfo *XFI = MF.getInfo(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -1286,6 +1317,9 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, unsigned LRSaveSize = StackSlotSize; + if (!isVarArg) + XFI->setReturnStackOffset(CCInfo.getNextStackOffset() + LRSaveSize); + // All getCopyFromReg ops must precede any getMemcpys to prevent the // scheduler clobbering a register before it has been copied. // The stages are: @@ -1436,7 +1470,11 @@ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); - return CCInfo.CheckReturn(Outs, RetCC_XCore); + if (!CCInfo.CheckReturn(Outs, RetCC_XCore)) + return false; + if (CCInfo.getNextStackOffset() != 0 && isVarArg) + return false; + return true; } SDValue @@ -1446,6 +1484,10 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, const SmallVectorImpl &OutVals, SDLoc dl, SelectionDAG &DAG) const { + XCoreFunctionInfo *XFI = + DAG.getMachineFunction().getInfo(); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + // CCValAssign - represent the assignment of // the return value to a location SmallVector RVLocs; @@ -1455,6 +1497,9 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, getTargetMachine(), RVLocs, *DAG.getContext()); // Analyze return values. + if (!isVarArg) + CCInfo.AllocateStack(XFI->getReturnStackOffset(), 4); + CCInfo.AnalyzeReturn(Outs, RetCC_XCore); SDValue Flag; @@ -1463,13 +1508,43 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // Return on XCore is always a "retsp 0" RetOps.push_back(DAG.getConstant(0, MVT::i32)); - // Copy the result values into the output registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { + SmallVector MemOpChains; + // Handle return values that must be copied to memory. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); + if (VA.isRegLoc()) + continue; + assert(VA.isMemLoc()); + if (isVarArg) { + report_fatal_error("Can't return value from vararg function in memory"); + } - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - OutVals[i], Flag); + int Offset = VA.getLocMemOffset(); + unsigned ObjSize = VA.getLocVT().getSizeInBits() / 8; + // Create the frame index object for the memory location. + int FI = MFI->CreateFixedObject(ObjSize, Offset, false); + + // Create a SelectionDAG node corresponding to a store + // to this memory location. + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + MemOpChains.push_back(DAG.getStore(Chain, dl, OutVals[i], FIN, + MachinePointerInfo::getFixedStack(FI), false, false, + 0)); + } + + // Transform all store nodes into one single node because + // all stores are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Now handle return values copied to registers. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign &VA = RVLocs[i]; + if (!VA.isRegLoc()) + continue; + // Copy the result values into the output registers. + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index afffcea790..65e2bad4f0 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -42,6 +42,9 @@ namespace llvm { // cp relative address CPRelativeWrapper, + // Load word from stack + LDWSP, + // Store word to stack STWSP, diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index b243ee3510..b6906569aa 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -68,6 +68,10 @@ def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>; def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp, [SDNPHasChain, SDNPMayStore]>; +def SDT_XCoreLdwsp : SDTypeProfile<1, 1, [SDTCisInt<1>]>; +def XCoreLdwsp : SDNode<"XCoreISD::LDWSP", SDT_XCoreLdwsp, + [SDNPHasChain, SDNPMayLoad]>; + // These are target-independent nodes, but have target-specific formats. def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, @@ -581,10 +585,12 @@ def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b), let mayLoad=1 in { def LDWSP_ru6 : _FRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b), - "ldw $a, sp[$b]", []>; + "ldw $a, sp[$b]", + [(set RRegs:$a, (XCoreLdwsp immU6:$b))]>; def LDWSP_lru6 : _FLRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b), - "ldw $a, sp[$b]", []>; + "ldw $a, sp[$b]", + [(set RRegs:$a, (XCoreLdwsp immU16:$b))]>; } let neverHasSideEffects = 1 in { diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h index f1777a87e1..afbec65285 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -33,6 +33,8 @@ class XCoreFunctionInfo : public MachineFunctionInfo { int FPSpillSlot; bool EHSpillSlotSet; int EHSpillSlot[2]; + unsigned ReturnStackOffset; + bool ReturnStackOffsetSet; int VarArgsFrameIndex; mutable int CachedEStackSize; std::vector > SpillLabels; @@ -42,6 +44,7 @@ public: LRSpillSlotSet(false), FPSpillSlotSet(false), EHSpillSlotSet(false), + ReturnStackOffsetSet(false), VarArgsFrameIndex(0), CachedEStackSize(-1) {} @@ -49,6 +52,7 @@ public: LRSpillSlotSet(false), FPSpillSlotSet(false), EHSpillSlotSet(false), + ReturnStackOffsetSet(false), VarArgsFrameIndex(0), CachedEStackSize(-1) {} @@ -78,6 +82,17 @@ public: return EHSpillSlot; } + void setReturnStackOffset(unsigned value) { + assert(!ReturnStackOffsetSet && "Return stack offset set twice"); + ReturnStackOffset = value; + ReturnStackOffsetSet = true; + } + + unsigned getReturnStackOffset() const { + assert(ReturnStackOffsetSet && "Return stack offset not set"); + return ReturnStackOffset; + } + bool isLargeFrame(const MachineFunction &MF) const; std::vector > &getSpillLabels() { diff --git a/test/CodeGen/XCore/bigstructret.ll b/test/CodeGen/XCore/bigstructret.ll index 877c57140a..567b372091 100644 --- a/test/CodeGen/XCore/bigstructret.ll +++ b/test/CodeGen/XCore/bigstructret.ll @@ -3,8 +3,8 @@ %0 = type { i32, i32, i32, i32 } %1 = type { i32, i32, i32, i32, i32 } -; Structs of 4 words can be returned in registers -define internal fastcc %0 @ReturnBigStruct() nounwind readnone { +; Structs of 4 words are returned in registers +define internal %0 @ReturnBigStruct() nounwind readnone { entry: %0 = insertvalue %0 zeroinitializer, i32 12, 0 %1 = insertvalue %0 %0, i32 24, 1 @@ -19,8 +19,39 @@ entry: ; CHECK: ldc r3, 24601 ; CHECK: retsp 0 -; Structs bigger than 4 words are returned via a hidden hidden sret-parameter -define internal fastcc %1 @ReturnBigStruct2() nounwind readnone { +; Structs of more than 4 words are partially returned in memory so long as the +; function is not variadic. +define { i32, i32, i32, i32, i32} @f(i32, i32, i32, i32, i32) nounwind readnone { +; CHECK-LABEL: f: +; CHECK: ldc [[REGISTER:r[0-9]+]], 5 +; CHECK-NEXT: stw [[REGISTER]], sp[2] +; CHECK-NEXT: retsp 0 +body: + ret { i32, i32, i32, i32, i32} { i32 undef, i32 undef, i32 undef, i32 undef, i32 5} +} + +@x = external global i32 +@y = external global i32 + +; Check we call a function returning more than 4 words correctly. +define i32 @g() nounwind { +; CHECK-LABEL: g: +; CHECK: entsp 3 +; CHECK: ldc [[REGISTER:r[0-9]+]], 0 +; CHECK: stw [[REGISTER]], sp[1] +; CHECK: bl f +; CHECK-NEXT: ldw r0, sp[2] +; CHECK-NEXT: retsp 3 +; +body: + %0 = call { i32, i32, i32, i32, i32 } @f(i32 0, i32 0, i32 0, i32 0, i32 0) + %1 = extractvalue { i32, i32, i32, i32, i32 } %0, 4 + ret i32 %1 +} + +; Variadic functions return structs bigger than 4 words via a hidden +; sret-parameter +define internal %1 @ReturnBigStruct2(i32 %dummy, ...) nounwind readnone { entry: %0 = insertvalue %1 zeroinitializer, i32 12, 0 %1 = insertvalue %1 %0, i32 24, 1 -- 2.39.2