//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the visit functions for load, store and alloca. // //===----------------------------------------------------------------------===// #include "InstCombine.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "instcombine" STATISTIC(NumDeadStore, "Number of dead stores eliminated"); STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to /// some part of a constant global variable. This intentionally only accepts /// constant expressions because we can't rewrite arbitrary instructions. static bool pointsToConstantGlobal(Value *V) { if (GlobalVariable *GV = dyn_cast(V)) return GV->isConstant(); if (ConstantExpr *CE = dyn_cast(V)) { if (CE->getOpcode() == Instruction::BitCast || CE->getOpcode() == Instruction::AddrSpaceCast || CE->getOpcode() == Instruction::GetElementPtr) return pointsToConstantGlobal(CE->getOperand(0)); } return false; } /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) /// pointer to an alloca. Ignore any reads of the pointer, return false if we /// see any stores or other unknown uses. If we see pointer arithmetic, keep /// track of whether it moves the pointer (with IsOffset) but otherwise traverse /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to /// the alloca, and if the source pointer is a pointer to a constant global, we /// can optimize this. static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, SmallVectorImpl &ToDelete) { // We track lifetime intrinsics as we encounter them. If we decide to go // ahead and replace the value with the global, this lets the caller quickly // eliminate the markers. SmallVector, 35> ValuesToInspect; ValuesToInspect.push_back(std::make_pair(V, false)); while (!ValuesToInspect.empty()) { auto ValuePair = ValuesToInspect.pop_back_val(); const bool IsOffset = ValuePair.second; for (auto &U : ValuePair.first->uses()) { Instruction *I = cast(U.getUser()); if (LoadInst *LI = dyn_cast(I)) { // Ignore non-volatile loads, they are always ok. if (!LI->isSimple()) return false; continue; } if (isa(I) || isa(I)) { // If uses of the bitcast are ok, we are ok. ValuesToInspect.push_back(std::make_pair(I, IsOffset)); continue; } if (GetElementPtrInst *GEP = dyn_cast(I)) { // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. ValuesToInspect.push_back( std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices())); continue; } if (CallSite CS = I) { // If this is the function being called then we treat it like a load and // ignore it. if (CS.isCallee(&U)) continue; // Inalloca arguments are clobbered by the call. unsigned ArgNo = CS.getArgumentNo(&U); if (CS.isInAllocaArgument(ArgNo)) return false; // If this is a readonly/readnone call site, then we know it is just a // load (but one that potentially returns the value itself), so we can // ignore it if we know that the value isn't captured. if (CS.onlyReadsMemory() && (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) continue; // If this is being passed as a byval argument, the caller is making a // copy, so it is only a read of the alloca. if (CS.isByValArgument(ArgNo)) continue; } // Lifetime intrinsics can be handled by the caller. if (IntrinsicInst *II = dyn_cast(I)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end) { assert(II->use_empty() && "Lifetime markers have no result to use!"); ToDelete.push_back(II); continue; } } // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast(I); if (!MI) return false; // If the transfer is using the alloca as a source of the transfer, then // ignore it since it is a load (unless the transfer is volatile). if (U.getOperandNo() == 1) { if (MI->isVolatile()) return false; continue; } // If we already have seen a copy, reject the second one. if (TheCopy) return false; // If the pointer has been offset from the start of the alloca, we can't // safely handle this. if (IsOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. if (U.getOperandNo() != 0) return false; // If the source of the memcpy/move is not a constant global, reject it. if (!pointsToConstantGlobal(MI->getSource())) return false; // Otherwise, the transform is safe. Remember the copy instruction. TheCopy = MI; } } return true; } /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only /// modified by a copy from a constant global. If we can prove this, we can /// replace any uses of the alloca with uses of the global directly. static MemTransferInst * isOnlyCopiedFromConstantGlobal(AllocaInst *AI, SmallVectorImpl &ToDelete) { MemTransferInst *TheCopy = nullptr; if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) return TheCopy; return nullptr; } Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (DL) { Type *IntPtrTy = DL->getIntPtrType(AI.getType()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); AI.setOperand(0, V); return &AI; } } // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info // BasicBlock::iterator It = New; while (isa(*It) || isa(*It)) ++It; // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... // Type *IdxTy = DL ? DL->getIntPtrType(AI.getType()) : Type::getInt64Ty(AI.getContext()); Value *NullIdx = Constant::getNullValue(IdxTy); Value *Idx[2] = { NullIdx, NullIdx }; Instruction *GEP = GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original // allocation. return ReplaceInstUsesWith(AI, GEP); } else if (isa(AI.getArraySize())) { return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); } } if (DL && AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should // allocate and return a unique pointer, even for a zero byte allocation. if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { // For a zero sized alloca there is no point in doing an array allocation. // This is helpful if the array size is a complicated expression not used // elsewhere. if (AI.isArrayAllocation()) { AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); return &AI; } // Get the first instruction in the entry block. BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); if (FirstInst != &AI) { // If the entry block doesn't start with a zero-size alloca then move // this one to the start of the entry block. There is no problem with // dominance as the array size was forced to a constant earlier already. AllocaInst *EntryAI = dyn_cast(FirstInst); if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { AI.moveBefore(FirstInst); return &AI; } // If the alignment of the entry block alloca is 0 (unspecified), // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. unsigned MaxAlign = std::max(EntryAI->getAlignment(), AI.getAlignment()); EntryAI->setAlignment(MaxAlign); if (AI.getType() != EntryAI->getType()) return new BitCastInst(EntryAI, AI.getType()); return ReplaceInstUsesWith(AI, EntryAI); } } } if (AI.getAlignment()) { // Check to see if this allocation is only modified by a memcpy/memmove from // a constant global whose alignment is equal to or exceeds that of the // allocation. If this is the case, we can change all users to use // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. SmallVector ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), AI.getAlignment(), DL, AT, &AI, DT); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) EraseInstFromFunction(*ToDelete[i]); Constant *TheSrc = cast(Copy->getSource()); Constant *Cast = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); Instruction *NewI = ReplaceInstUsesWith(AI, Cast); EraseInstFromFunction(*Copy); ++NumGlobalCopies; return NewI; } } } // At last, use the generic allocation site handler to aggressively remove // unused allocas. return visitAllocSite(AI); } /// \brief Helper to combine a load to a new type. /// /// This just does the work of combining a load to a new type. It handles /// metadata, etc., and returns the new instruction. The \c NewTy should be the /// loaded *value* type. This will convert it to a pointer, cast the operand to /// that pointer type, load it, etc. /// /// Note that this will create all of the instructions with whatever insert /// point the \c InstCombiner currently is using. static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy) { Value *Ptr = LI.getPointerOperand(); unsigned AS = LI.getPointerAddressSpace(); SmallVector, 8> MD; LI.getAllMetadata(MD); LoadInst *NewLoad = IC.Builder->CreateAlignedLoad( IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)), LI.getAlignment(), LI.getName()); for (const auto &MDPair : MD) { unsigned ID = MDPair.first; MDNode *N = MDPair.second; // Note, essentially every kind of metadata should be preserved here! This // routine is supposed to clone a load instruction changing *only its type*. // The only metadata it makes sense to drop is metadata which is invalidated // when the pointer type changes. This should essentially never be the case // in LLVM, but we explicitly switch over only known metadata to be // conservatively correct. If you are adding metadata to LLVM which pertains // to loads, you almost certainly want to add it here. switch (ID) { case LLVMContext::MD_dbg: case LLVMContext::MD_tbaa: case LLVMContext::MD_prof: case LLVMContext::MD_fpmath: case LLVMContext::MD_tbaa_struct: case LLVMContext::MD_invariant_load: case LLVMContext::MD_alias_scope: case LLVMContext::MD_noalias: case LLVMContext::MD_nontemporal: case LLVMContext::MD_mem_parallel_loop_access: // All of these directly apply. NewLoad->setMetadata(ID, N); break; case LLVMContext::MD_range: // FIXME: It would be nice to propagate this in some way, but the type // conversions make it hard. break; } } return NewLoad; } /// \brief Combine loads to match the type of value their uses after looking /// through intervening bitcasts. /// /// The core idea here is that if the result of a load is used in an operation, /// we should load the type most conducive to that operation. For example, when /// loading an integer and converting that immediately to a pointer, we should /// instead directly load a pointer. /// /// However, this routine must never change the width of a load or the number of /// loads as that would introduce a semantic change. This combine is expected to /// be a semantic no-op which just allows loads to more closely model the types /// of their consuming operations. /// /// Currently, we also refuse to change the precise type used for an atomic load /// or a volatile load. This is debatable, and might be reasonable to change /// later. However, it is risky in case some backend or other part of LLVM is /// relying on the exact type loaded to select appropriate atomic operations. static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { // FIXME: We could probably with some care handle both volatile and atomic // loads here but it isn't clear that this is important. if (!LI.isSimple()) return nullptr; if (LI.use_empty()) return nullptr; // Fold away bit casts of the loaded value by loading the desired type. if (LI.hasOneUse()) if (auto *BC = dyn_cast(LI.user_back())) { LoadInst *NewLoad = combineLoadToNewType(IC, LI, BC->getDestTy()); BC->replaceAllUsesWith(NewLoad); IC.EraseInstFromFunction(*BC); return &LI; } // FIXME: We should also canonicalize loads of vectors when their elements are // cast to other types. return nullptr; } Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); // Try to canonicalize the loaded type. if (Instruction *Res = combineLoadToOperationType(*this, LI)) return Res; // Attempt to improve the alignment. if (DL) { unsigned KnownAlign = getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()), DL, AT, &LI, DT); unsigned LoadAlign = LI.getAlignment(); unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : DL->getABITypeAlignment(LI.getType()); if (KnownAlign > EffectiveLoadAlign) LI.setAlignment(KnownAlign); else if (LoadAlign == 0) LI.setAlignment(EffectiveLoadAlign); } // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. if (!LI.isSimple()) return nullptr; // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. BasicBlock::iterator BBI = &LI; if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) return ReplaceInstUsesWith( LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), LI.getName() + ".cast")); // load(gep null, ...) -> unreachable if (GetElementPtrInst *GEPI = dyn_cast(Op)) { const Value *GEPI0 = GEPI->getOperand(0); // TODO: Consider a target hook for valid address spaces for this xform. if (isa(GEPI0) && GEPI->getPointerAddressSpace() == 0){ // Insert a new store to null instruction before the load to indicate // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the // CFG. new StoreInst(UndefValue::get(LI.getType()), Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } } // load null/undef -> unreachable // TODO: Consider a target hook for valid address spaces for this xform. if (isa(Op) || (isa(Op) && LI.getPointerAddressSpace() == 0)) { // Insert a new store to null instruction before the load to indicate that // this code is not reachable. We do this instead of inserting an // unreachable instruction directly because we cannot modify the CFG. new StoreInst(UndefValue::get(LI.getType()), Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and // exposes redundancy in the code. // // Note that we cannot do the transformation unless we know that the // introduced loads cannot trap! Something like this is valid as long as // the condition is always false: load (select bool %C, int* null, int* %G), // but it would not be valid if we transformed it to load from null // unconditionally. // if (SelectInst *SI = dyn_cast(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). unsigned Align = LI.getAlignment(); if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) && isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) { LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), SI->getOperand(1)->getName()+".val"); LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), SI->getOperand(2)->getName()+".val"); V1->setAlignment(Align); V2->setAlignment(Align); return SelectInst::Create(SI->getCondition(), V1, V2); } // load (select (cond, null, P)) -> load P if (Constant *C = dyn_cast(SI->getOperand(1))) if (C->isNullValue()) { LI.setOperand(0, SI->getOperand(2)); return &LI; } // load (select (cond, P, null)) -> load P if (Constant *C = dyn_cast(SI->getOperand(2))) if (C->isNullValue()) { LI.setOperand(0, SI->getOperand(1)); return &LI; } } } return nullptr; } /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P /// when possible. This makes it generally easy to do alias analysis and/or /// SROA/mem2reg of the memory object. static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { User *CI = cast(SI.getOperand(1)); Value *CastOp = CI->getOperand(0); Type *DestPTy = CI->getType()->getPointerElementType(); PointerType *SrcTy = dyn_cast(CastOp->getType()); if (!SrcTy) return nullptr; Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return nullptr; /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" /// to its first element. This allows us to handle things like: /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) /// on 32-bit hosts. SmallVector NewGEPIndices; // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { // Index through pointer. Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); NewGEPIndices.push_back(Zero); while (1) { if (StructType *STy = dyn_cast(SrcPTy)) { if (!STy->getNumElements()) /* Struct can be empty {} */ break; NewGEPIndices.push_back(Zero); SrcPTy = STy->getElementType(0); } else if (ArrayType *ATy = dyn_cast(SrcPTy)) { NewGEPIndices.push_back(Zero); SrcPTy = ATy->getElementType(); } else { break; } } SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); } if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) return nullptr; // If the pointers point into different address spaces don't do the // transformation. if (SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace()) return nullptr; // If the pointers point to values of different sizes don't do the // transformation. if (!IC.getDataLayout() || IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != IC.getDataLayout()->getTypeSizeInBits(DestPTy)) return nullptr; // If the pointers point to pointers to different address spaces don't do the // transformation. It is not safe to introduce an addrspacecast instruction in // this case since, depending on the target, addrspacecast may not be a no-op // cast. if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) return nullptr; // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before // the store, cast the value to be stored. Value *NewCast; Instruction::CastOps opcode = Instruction::BitCast; Type* CastSrcTy = DestPTy; Type* CastDstTy = SrcPTy; if (CastDstTy->isPointerTy()) { if (CastSrcTy->isIntegerTy()) opcode = Instruction::IntToPtr; } else if (CastDstTy->isIntegerTy()) { if (CastSrcTy->isPointerTy()) opcode = Instruction::PtrToInt; } // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. if (!NewGEPIndices.empty()) CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); Value *SIOp0 = SI.getOperand(0); NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"); SI.setOperand(0, NewCast); SI.setOperand(1, CastOp); return &SI; } /// equivalentAddressValues - Test if A and B will obviously have the same /// value. This includes recognizing that %t0 and %t1 will have the same /// value in code like this: /// %t0 = getelementptr \@a, 0, 3 /// store i32 0, i32* %t0 /// %t1 = getelementptr \@a, 0, 3 /// %t2 = load i32* %t1 /// static bool equivalentAddressValues(Value *A, Value *B) { // Test if the values are trivially equivalent. if (A == B) return true; // Test if the values come form identical arithmetic instructions. // This uses isIdenticalToWhenDefined instead of isIdenticalTo because // its only used to compare two uses within the same basic block, which // means that they'll always either have the same value or one of them // will have an undefined value. if (isa(A) || isa(A) || isa(A) || isa(A)) if (Instruction *BI = dyn_cast(B)) if (cast(A)->isIdenticalToWhenDefined(BI)) return true; // Otherwise they may not be equivalent. return false; } Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); // Attempt to improve the alignment. if (DL) { unsigned KnownAlign = getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AT, &SI, DT); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : DL->getABITypeAlignment(Val->getType()); if (KnownAlign > EffectiveStoreAlign) SI.setAlignment(KnownAlign); else if (StoreAlign == 0) SI.setAlignment(EffectiveStoreAlign); } // Don't hack volatile/atomic stores. // FIXME: Some bits are legal for atomic stores; needs refactoring. if (!SI.isSimple()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. if (Ptr->hasOneUse()) { if (isa(Ptr)) return EraseInstFromFunction(SI); if (GetElementPtrInst *GEP = dyn_cast(Ptr)) { if (isa(GEP->getOperand(0))) { if (GEP->getOperand(0)->hasOneUse()) return EraseInstFromFunction(SI); } } } // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. BasicBlock::iterator BBI = &SI; for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; --ScanInsts) { --BBI; // Don't count debug info directives, lest they affect codegen, // and we skip pointer-to-pointer bitcasts, which are NOPs. if (isa(BBI) || (isa(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; } if (StoreInst *PrevSI = dyn_cast(BBI)) { // Prev store isn't volatile, and stores to the same location? if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1))) { ++NumDeadStore; ++BBI; EraseInstFromFunction(*PrevSI); continue; } break; } // If this is a load, we have to stop. However, if the loaded value is from // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast(BBI)) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && LI->isSimple()) return EraseInstFromFunction(SI); // Otherwise, this is a load from some other location. Stores before it // may not be dead. break; } // Don't skip over loads or things that can modify memory. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; } // store X, null -> turns into 'unreachable' in SimplifyCFG if (isa(Ptr) && SI.getPointerAddressSpace() == 0) { if (!isa(Val)) { SI.setOperand(0, UndefValue::get(Val->getType())); if (Instruction *U = dyn_cast(Val)) Worklist.Add(U); // Dropped a use. } return nullptr; // Do not modify these! } // store undef, Ptr -> noop if (isa(Val)) return EraseInstFromFunction(SI); // If the pointer destination is a cast, see if we can fold the cast into the // source instead. if (isa(Ptr)) if (Instruction *Res = InstCombineStoreToCast(*this, SI)) return Res; if (ConstantExpr *CE = dyn_cast(Ptr)) if (CE->isCast()) if (Instruction *Res = InstCombineStoreToCast(*this, SI)) return Res; // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. BBI = &SI; do { ++BBI; } while (isa(BBI) || (isa(BBI) && BBI->getType()->isPointerTy())); if (BranchInst *BI = dyn_cast(BBI)) if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) return nullptr; // xform done! return nullptr; } /// SimplifyStoreAtEndOfBlock - Turn things like: /// if () { *P = v1; } else { *P = v2 } /// into a phi node with a store in the successor. /// /// Simplify things like: /// *P = v1; if () { *P = v2; } /// into a phi node with a store in the successor. /// bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BasicBlock *StoreBB = SI.getParent(); // Check to see if the successor block has exactly two incoming edges. If // so, see if the other predecessor contains a store to the same location. // if so, insert a PHI node (if needed) and move the stores down. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); BasicBlock *P = *PI; BasicBlock *OtherBB = nullptr; if (P != StoreBB) OtherBB = P; if (++PI == pred_end(DestBB)) return false; P = *PI; if (P != StoreBB) { if (OtherBB) return false; OtherBB = P; } if (++PI != pred_end(DestBB)) return false; // Bail out if all the relevant blocks aren't distinct (this can happen, // for example, if SI is in an infinite loop) if (StoreBB == DestBB || OtherBB == DestBB) return false; // Verify that the other block ends in a branch and is not otherwise empty. BasicBlock::iterator BBI = OtherBB->getTerminator(); BranchInst *OtherBr = dyn_cast(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. StoreInst *OtherStore = nullptr; if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. while (isa(BBI) || (isa(BBI) && BBI->getType()->isPointerTy())) { if (BBI==OtherBB->begin()) return false; --BBI; } // If this isn't a store, isn't a store to the same location, or is not the // right kind of store, bail out. OtherStore = dyn_cast(BBI); if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || !SI.isSameOperationAs(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the // destinations is StoreBB, then we have the if/then case. if (OtherBr->getSuccessor(0) != StoreBB && OtherBr->getSuccessor(1) != StoreBB) return false; // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an // if/then triangle. See if there is a store to the same ptr as SI that // lives in OtherBB. for (;; --BBI) { // Check to see if we find the matching store. if ((OtherStore = dyn_cast(BBI))) { if (OtherStore->getOperand(1) != SI.getOperand(1) || !SI.isSameOperationAs(OtherStore)) return false; break; } // If we find something that may be using or overwriting the stored // value, or if we run out of instructions, we can't do the xform. if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || BBI == OtherBB->begin()) return false; } // In order to eliminate the store in OtherBr, we have to // make sure nothing reads or overwrites the stored value in // StoreBB. for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { // FIXME: This should really be AA driven. if (I->mayReadFromMemory() || I->mayWriteToMemory()) return false; } } // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); PN->addIncoming(SI.getOperand(0), SI.getParent()); PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); } // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstInsertionPt(); StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlignment(), SI.getOrdering(), SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); NewSI->setDebugLoc(OtherStore->getDebugLoc()); // If the two stores had AA tags, merge them. AAMDNodes AATags; SI.getAAMetadata(AATags); if (AATags) { OtherStore->getAAMetadata(AATags, /* Merge = */ true); NewSI->setAAMetadata(AATags); } // Nuke the old stores. EraseInstFromFunction(SI); EraseInstFromFunction(*OtherStore); return true; }