6bd38ae6165720a5862d46b9f9e3763efd38af6c
1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallBitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/Target/TargetLowering.h"
37 #include "llvm/Target/TargetOptions.h"
38 #include "llvm/Target/TargetRegisterInfo.h"
39 #include "llvm/Target/TargetSubtargetInfo.h"
40 #include <algorithm>
41 using namespace llvm;
43 #define DEBUG_TYPE "dagcombine"
45 STATISTIC(NodesCombined , "Number of dag nodes combined");
46 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
47 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
48 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
49 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
50 STATISTIC(SlicedLoads, "Number of load sliced");
52 namespace {
53 static cl::opt<bool>
54 CombinerAA("combiner-alias-analysis", cl::Hidden,
55 cl::desc("Enable DAG combiner alias-analysis heuristics"));
57 static cl::opt<bool>
58 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
59 cl::desc("Enable DAG combiner's use of IR alias analysis"));
61 static cl::opt<bool>
62 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
63 cl::desc("Enable DAG combiner's use of TBAA"));
65 #ifndef NDEBUG
66 static cl::opt<std::string>
67 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
68 cl::desc("Only use DAG-combiner alias analysis in this"
69 " function"));
70 #endif
72 /// Hidden option to stress test load slicing, i.e., when this option
73 /// is enabled, load slicing bypasses most of its profitability guards.
74 static cl::opt<bool>
75 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
76 cl::desc("Bypass the profitability model of load "
77 "slicing"),
78 cl::init(false));
80 static cl::opt<bool>
81 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
82 cl::desc("DAG combiner may split indexing from loads"));
84 //------------------------------ DAGCombiner ---------------------------------//
86 class DAGCombiner {
87 SelectionDAG &DAG;
88 const TargetLowering &TLI;
89 CombineLevel Level;
90 CodeGenOpt::Level OptLevel;
91 bool LegalOperations;
92 bool LegalTypes;
93 bool ForCodeSize;
95 /// \brief Worklist of all of the nodes that need to be simplified.
96 ///
97 /// This must behave as a stack -- new nodes to process are pushed onto the
98 /// back and when processing we pop off of the back.
99 ///
100 /// The worklist will not contain duplicates but may contain null entries
101 /// due to nodes being deleted from the underlying DAG.
102 SmallVector<SDNode *, 64> Worklist;
104 /// \brief Mapping from an SDNode to its position on the worklist.
105 ///
106 /// This is used to find and remove nodes from the worklist (by nulling
107 /// them) when they are deleted from the underlying DAG. It relies on
108 /// stable indices of nodes within the worklist.
109 DenseMap<SDNode *, unsigned> WorklistMap;
111 /// \brief Set of nodes which have been combined (at least once).
112 ///
113 /// This is used to allow us to reliably add any operands of a DAG node
114 /// which have not yet been combined to the worklist.
115 SmallPtrSet<SDNode *, 64> CombinedNodes;
117 // AA - Used for DAG load/store alias analysis.
118 AliasAnalysis &AA;
120 /// When an instruction is simplified, add all users of the instruction to
121 /// the work lists because they might get more simplified now.
122 void AddUsersToWorklist(SDNode *N) {
123 for (SDNode *Node : N->uses())
124 AddToWorklist(Node);
125 }
127 /// Call the node-specific routine that folds each particular type of node.
128 SDValue visit(SDNode *N);
130 public:
131 /// Add to the worklist making sure its instance is at the back (next to be
132 /// processed.)
133 void AddToWorklist(SDNode *N) {
134 // Skip handle nodes as they can't usefully be combined and confuse the
135 // zero-use deletion strategy.
136 if (N->getOpcode() == ISD::HANDLENODE)
137 return;
139 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
140 Worklist.push_back(N);
141 }
143 /// Remove all instances of N from the worklist.
144 void removeFromWorklist(SDNode *N) {
145 CombinedNodes.erase(N);
147 auto It = WorklistMap.find(N);
148 if (It == WorklistMap.end())
149 return; // Not in the worklist.
151 // Null out the entry rather than erasing it to avoid a linear operation.
152 Worklist[It->second] = nullptr;
153 WorklistMap.erase(It);
154 }
156 void deleteAndRecombine(SDNode *N);
157 bool recursivelyDeleteUnusedNodes(SDNode *N);
159 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
160 bool AddTo = true);
162 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
163 return CombineTo(N, &Res, 1, AddTo);
164 }
166 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
167 bool AddTo = true) {
168 SDValue To[] = { Res0, Res1 };
169 return CombineTo(N, To, 2, AddTo);
170 }
172 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
174 private:
176 /// Check the specified integer node value to see if it can be simplified or
177 /// if things it uses can be simplified by bit propagation.
178 /// If so, return true.
179 bool SimplifyDemandedBits(SDValue Op) {
180 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
181 APInt Demanded = APInt::getAllOnesValue(BitWidth);
182 return SimplifyDemandedBits(Op, Demanded);
183 }
185 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
187 bool CombineToPreIndexedLoadStore(SDNode *N);
188 bool CombineToPostIndexedLoadStore(SDNode *N);
189 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
190 bool SliceUpLoad(SDNode *N);
192 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
193 /// load.
194 ///
195 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
196 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
197 /// \param EltNo index of the vector element to load.
198 /// \param OriginalLoad load that EVE came from to be replaced.
199 /// \returns EVE on success SDValue() on failure.
200 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
201 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
202 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
203 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
204 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
205 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
206 SDValue PromoteIntBinOp(SDValue Op);
207 SDValue PromoteIntShiftOp(SDValue Op);
208 SDValue PromoteExtend(SDValue Op);
209 bool PromoteLoad(SDValue Op);
211 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
212 SDValue Trunc, SDValue ExtLoad, SDLoc DL,
213 ISD::NodeType ExtType);
215 /// Call the node-specific routine that knows how to fold each
216 /// particular type of node. If that doesn't do anything, try the
217 /// target-specific DAG combines.
218 SDValue combine(SDNode *N);
220 // Visitation implementation - Implement dag node combining for different
221 // node types. The semantics are as follows:
222 // Return Value:
223 // SDValue.getNode() == 0 - No change was made
224 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
225 // otherwise - N should be replaced by the returned Operand.
226 //
227 SDValue visitTokenFactor(SDNode *N);
228 SDValue visitMERGE_VALUES(SDNode *N);
229 SDValue visitADD(SDNode *N);
230 SDValue visitSUB(SDNode *N);
231 SDValue visitADDC(SDNode *N);
232 SDValue visitSUBC(SDNode *N);
233 SDValue visitADDE(SDNode *N);
234 SDValue visitSUBE(SDNode *N);
235 SDValue visitMUL(SDNode *N);
236 SDValue visitSDIV(SDNode *N);
237 SDValue visitUDIV(SDNode *N);
238 SDValue visitSREM(SDNode *N);
239 SDValue visitUREM(SDNode *N);
240 SDValue visitMULHU(SDNode *N);
241 SDValue visitMULHS(SDNode *N);
242 SDValue visitSMUL_LOHI(SDNode *N);
243 SDValue visitUMUL_LOHI(SDNode *N);
244 SDValue visitSMULO(SDNode *N);
245 SDValue visitUMULO(SDNode *N);
246 SDValue visitSDIVREM(SDNode *N);
247 SDValue visitUDIVREM(SDNode *N);
248 SDValue visitAND(SDNode *N);
249 SDValue visitOR(SDNode *N);
250 SDValue visitXOR(SDNode *N);
251 SDValue SimplifyVBinOp(SDNode *N);
252 SDValue SimplifyVUnaryOp(SDNode *N);
253 SDValue visitSHL(SDNode *N);
254 SDValue visitSRA(SDNode *N);
255 SDValue visitSRL(SDNode *N);
256 SDValue visitRotate(SDNode *N);
257 SDValue visitCTLZ(SDNode *N);
258 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
259 SDValue visitCTTZ(SDNode *N);
260 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
261 SDValue visitCTPOP(SDNode *N);
262 SDValue visitSELECT(SDNode *N);
263 SDValue visitVSELECT(SDNode *N);
264 SDValue visitSELECT_CC(SDNode *N);
265 SDValue visitSETCC(SDNode *N);
266 SDValue visitSIGN_EXTEND(SDNode *N);
267 SDValue visitZERO_EXTEND(SDNode *N);
268 SDValue visitANY_EXTEND(SDNode *N);
269 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
270 SDValue visitTRUNCATE(SDNode *N);
271 SDValue visitBITCAST(SDNode *N);
272 SDValue visitBUILD_PAIR(SDNode *N);
273 SDValue visitFADD(SDNode *N);
274 SDValue visitFSUB(SDNode *N);
275 SDValue visitFMUL(SDNode *N);
276 SDValue visitFMA(SDNode *N);
277 SDValue visitFDIV(SDNode *N);
278 SDValue visitFREM(SDNode *N);
279 SDValue visitFSQRT(SDNode *N);
280 SDValue visitFCOPYSIGN(SDNode *N);
281 SDValue visitSINT_TO_FP(SDNode *N);
282 SDValue visitUINT_TO_FP(SDNode *N);
283 SDValue visitFP_TO_SINT(SDNode *N);
284 SDValue visitFP_TO_UINT(SDNode *N);
285 SDValue visitFP_ROUND(SDNode *N);
286 SDValue visitFP_ROUND_INREG(SDNode *N);
287 SDValue visitFP_EXTEND(SDNode *N);
288 SDValue visitFNEG(SDNode *N);
289 SDValue visitFABS(SDNode *N);
290 SDValue visitFCEIL(SDNode *N);
291 SDValue visitFTRUNC(SDNode *N);
292 SDValue visitFFLOOR(SDNode *N);
293 SDValue visitFMINNUM(SDNode *N);
294 SDValue visitFMAXNUM(SDNode *N);
295 SDValue visitBRCOND(SDNode *N);
296 SDValue visitBR_CC(SDNode *N);
297 SDValue visitLOAD(SDNode *N);
298 SDValue visitSTORE(SDNode *N);
299 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
300 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
301 SDValue visitBUILD_VECTOR(SDNode *N);
302 SDValue visitCONCAT_VECTORS(SDNode *N);
303 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
304 SDValue visitVECTOR_SHUFFLE(SDNode *N);
305 SDValue visitINSERT_SUBVECTOR(SDNode *N);
306 SDValue visitMLOAD(SDNode *N);
307 SDValue visitMSTORE(SDNode *N);
309 SDValue XformToShuffleWithZero(SDNode *N);
310 SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
312 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
314 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
315 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
316 SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
317 SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
318 SDValue N3, ISD::CondCode CC,
319 bool NotExtCompare = false);
320 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
321 SDLoc DL, bool foldBooleans = true);
323 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
324 SDValue &CC) const;
325 bool isOneUseSetCC(SDValue N) const;
327 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
328 unsigned HiOp);
329 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
330 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
331 SDValue BuildSDIV(SDNode *N);
332 SDValue BuildSDIVPow2(SDNode *N);
333 SDValue BuildUDIV(SDNode *N);
334 SDValue BuildReciprocalEstimate(SDValue Op);
335 SDValue BuildRsqrtEstimate(SDValue Op);
336 SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
337 SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
338 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
339 bool DemandHighBits = true);
340 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
341 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
342 SDValue InnerPos, SDValue InnerNeg,
343 unsigned PosOpcode, unsigned NegOpcode,
344 SDLoc DL);
345 SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
346 SDValue ReduceLoadWidth(SDNode *N);
347 SDValue ReduceLoadOpStoreWidth(SDNode *N);
348 SDValue TransformFPLoadStorePair(SDNode *N);
349 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
350 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
352 SDValue GetDemandedBits(SDValue V, const APInt &Mask);
354 /// Walk up chain skipping non-aliasing memory nodes,
355 /// looking for aliasing nodes and adding them to the Aliases vector.
356 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
357 SmallVectorImpl<SDValue> &Aliases);
359 /// Return true if there is any possibility that the two addresses overlap.
360 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
362 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
363 /// chain (aliasing node.)
364 SDValue FindBetterChain(SDNode *N, SDValue Chain);
366 /// Merge consecutive store operations into a wide store.
367 /// This optimization uses wide integers or vectors when possible.
368 /// \return True if some memory operations were changed.
369 bool MergeConsecutiveStores(StoreSDNode *N);
371 /// \brief Try to transform a truncation where C is a constant:
372 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
373 ///
374 /// \p N needs to be a truncation and its first operand an AND. Other
375 /// requirements are checked by the function (e.g. that trunc is
376 /// single-use) and if missed an empty SDValue is returned.
377 SDValue distributeTruncateThroughAnd(SDNode *N);
379 public:
380 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
381 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
382 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
383 AttributeSet FnAttrs =
384 DAG.getMachineFunction().getFunction()->getAttributes();
385 ForCodeSize =
386 FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
387 Attribute::OptimizeForSize) ||
388 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
389 }
391 /// Runs the dag combiner on all nodes in the work list
392 void Run(CombineLevel AtLevel);
394 SelectionDAG &getDAG() const { return DAG; }
396 /// Returns a type large enough to hold any valid shift amount - before type
397 /// legalization these can be huge.
398 EVT getShiftAmountTy(EVT LHSTy) {
399 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
400 if (LHSTy.isVector())
401 return LHSTy;
402 return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
403 : TLI.getPointerTy();
404 }
406 /// This method returns true if we are running before type legalization or
407 /// if the specified VT is legal.
408 bool isTypeLegal(const EVT &VT) {
409 if (!LegalTypes) return true;
410 return TLI.isTypeLegal(VT);
411 }
413 /// Convenience wrapper around TargetLowering::getSetCCResultType
414 EVT getSetCCResultType(EVT VT) const {
415 return TLI.getSetCCResultType(*DAG.getContext(), VT);
416 }
417 };
418 }
421 namespace {
422 /// This class is a DAGUpdateListener that removes any deleted
423 /// nodes from the worklist.
424 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
425 DAGCombiner &DC;
426 public:
427 explicit WorklistRemover(DAGCombiner &dc)
428 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
430 void NodeDeleted(SDNode *N, SDNode *E) override {
431 DC.removeFromWorklist(N);
432 }
433 };
434 }
436 //===----------------------------------------------------------------------===//
437 // TargetLowering::DAGCombinerInfo implementation
438 //===----------------------------------------------------------------------===//
440 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
441 ((DAGCombiner*)DC)->AddToWorklist(N);
442 }
444 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
445 ((DAGCombiner*)DC)->removeFromWorklist(N);
446 }
448 SDValue TargetLowering::DAGCombinerInfo::
449 CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
450 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
451 }
453 SDValue TargetLowering::DAGCombinerInfo::
454 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
455 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
456 }
459 SDValue TargetLowering::DAGCombinerInfo::
460 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
461 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
462 }
464 void TargetLowering::DAGCombinerInfo::
465 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
466 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
467 }
469 //===----------------------------------------------------------------------===//
470 // Helper Functions
471 //===----------------------------------------------------------------------===//
473 void DAGCombiner::deleteAndRecombine(SDNode *N) {
474 removeFromWorklist(N);
476 // If the operands of this node are only used by the node, they will now be
477 // dead. Make sure to re-visit them and recursively delete dead nodes.
478 for (const SDValue &Op : N->ops())
479 // For an operand generating multiple values, one of the values may
480 // become dead allowing further simplification (e.g. split index
481 // arithmetic from an indexed load).
482 if (Op->hasOneUse() || Op->getNumValues() > 1)
483 AddToWorklist(Op.getNode());
485 DAG.DeleteNode(N);
486 }
488 /// Return 1 if we can compute the negated form of the specified expression for
489 /// the same cost as the expression itself, or 2 if we can compute the negated
490 /// form more cheaply than the expression itself.
491 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
492 const TargetLowering &TLI,
493 const TargetOptions *Options,
494 unsigned Depth = 0) {
495 // fneg is removable even if it has multiple uses.
496 if (Op.getOpcode() == ISD::FNEG) return 2;
498 // Don't allow anything with multiple uses.
499 if (!Op.hasOneUse()) return 0;
501 // Don't recurse exponentially.
502 if (Depth > 6) return 0;
504 switch (Op.getOpcode()) {
505 default: return false;
506 case ISD::ConstantFP:
507 // Don't invert constant FP values after legalize. The negated constant
508 // isn't necessarily legal.
509 return LegalOperations ? 0 : 1;
510 case ISD::FADD:
511 // FIXME: determine better conditions for this xform.
512 if (!Options->UnsafeFPMath) return 0;
514 // After operation legalization, it might not be legal to create new FSUBs.
515 if (LegalOperations &&
516 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
517 return 0;
519 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
520 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
521 Options, Depth + 1))
522 return V;
523 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
524 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
525 Depth + 1);
526 case ISD::FSUB:
527 // We can't turn -(A-B) into B-A when we honor signed zeros.
528 if (!Options->UnsafeFPMath) return 0;
530 // fold (fneg (fsub A, B)) -> (fsub B, A)
531 return 1;
533 case ISD::FMUL:
534 case ISD::FDIV:
535 if (Options->HonorSignDependentRoundingFPMath()) return 0;
537 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
538 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
539 Options, Depth + 1))
540 return V;
542 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
543 Depth + 1);
545 case ISD::FP_EXTEND:
546 case ISD::FP_ROUND:
547 case ISD::FSIN:
548 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
549 Depth + 1);
550 }
551 }
553 /// If isNegatibleForFree returns true, return the newly negated expression.
554 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
555 bool LegalOperations, unsigned Depth = 0) {
556 const TargetOptions &Options = DAG.getTarget().Options;
557 // fneg is removable even if it has multiple uses.
558 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
560 // Don't allow anything with multiple uses.
561 assert(Op.hasOneUse() && "Unknown reuse!");
563 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
564 switch (Op.getOpcode()) {
565 default: llvm_unreachable("Unknown code");
566 case ISD::ConstantFP: {
567 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
568 V.changeSign();
569 return DAG.getConstantFP(V, Op.getValueType());
570 }
571 case ISD::FADD:
572 // FIXME: determine better conditions for this xform.
573 assert(Options.UnsafeFPMath);
575 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
576 if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
577 DAG.getTargetLoweringInfo(), &Options, Depth+1))
578 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
579 GetNegatedExpression(Op.getOperand(0), DAG,
580 LegalOperations, Depth+1),
581 Op.getOperand(1));
582 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
583 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
584 GetNegatedExpression(Op.getOperand(1), DAG,
585 LegalOperations, Depth+1),
586 Op.getOperand(0));
587 case ISD::FSUB:
588 // We can't turn -(A-B) into B-A when we honor signed zeros.
589 assert(Options.UnsafeFPMath);
591 // fold (fneg (fsub 0, B)) -> B
592 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
593 if (N0CFP->getValueAPF().isZero())
594 return Op.getOperand(1);
596 // fold (fneg (fsub A, B)) -> (fsub B, A)
597 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
598 Op.getOperand(1), Op.getOperand(0));
600 case ISD::FMUL:
601 case ISD::FDIV:
602 assert(!Options.HonorSignDependentRoundingFPMath());
604 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
605 if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
606 DAG.getTargetLoweringInfo(), &Options, Depth+1))
607 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
608 GetNegatedExpression(Op.getOperand(0), DAG,
609 LegalOperations, Depth+1),
610 Op.getOperand(1));
612 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
613 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
614 Op.getOperand(0),
615 GetNegatedExpression(Op.getOperand(1), DAG,
616 LegalOperations, Depth+1));
618 case ISD::FP_EXTEND:
619 case ISD::FSIN:
620 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
621 GetNegatedExpression(Op.getOperand(0), DAG,
622 LegalOperations, Depth+1));
623 case ISD::FP_ROUND:
624 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
625 GetNegatedExpression(Op.getOperand(0), DAG,
626 LegalOperations, Depth+1),
627 Op.getOperand(1));
628 }
629 }
631 // Return true if this node is a setcc, or is a select_cc
632 // that selects between the target values used for true and false, making it
633 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
634 // the appropriate nodes based on the type of node we are checking. This
635 // simplifies life a bit for the callers.
636 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
637 SDValue &CC) const {
638 if (N.getOpcode() == ISD::SETCC) {
639 LHS = N.getOperand(0);
640 RHS = N.getOperand(1);
641 CC = N.getOperand(2);
642 return true;
643 }
645 if (N.getOpcode() != ISD::SELECT_CC ||
646 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
647 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
648 return false;
650 if (TLI.getBooleanContents(N.getValueType()) ==
651 TargetLowering::UndefinedBooleanContent)
652 return false;
654 LHS = N.getOperand(0);
655 RHS = N.getOperand(1);
656 CC = N.getOperand(4);
657 return true;
658 }
660 /// Return true if this is a SetCC-equivalent operation with only one use.
661 /// If this is true, it allows the users to invert the operation for free when
662 /// it is profitable to do so.
663 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
664 SDValue N0, N1, N2;
665 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
666 return true;
667 return false;
668 }
670 /// Returns true if N is a BUILD_VECTOR node whose
671 /// elements are all the same constant or undefined.
672 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
673 BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
674 if (!C)
675 return false;
677 APInt SplatUndef;
678 unsigned SplatBitSize;
679 bool HasAnyUndefs;
680 EVT EltVT = N->getValueType(0).getVectorElementType();
681 return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
682 HasAnyUndefs) &&
683 EltVT.getSizeInBits() >= SplatBitSize);
684 }
686 // \brief Returns the SDNode if it is a constant BuildVector or constant.
687 static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
688 if (isa<ConstantSDNode>(N))
689 return N.getNode();
690 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
691 if (BV && BV->isConstant())
692 return BV;
693 return nullptr;
694 }
696 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
697 // int.
698 static ConstantSDNode *isConstOrConstSplat(SDValue N) {
699 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
700 return CN;
702 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
703 BitVector UndefElements;
704 ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
706 // BuildVectors can truncate their operands. Ignore that case here.
707 // FIXME: We blindly ignore splats which include undef which is overly
708 // pessimistic.
709 if (CN && UndefElements.none() &&
710 CN->getValueType(0) == N.getValueType().getScalarType())
711 return CN;
712 }
714 return nullptr;
715 }
717 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
718 // float.
719 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
720 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
721 return CN;
723 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
724 BitVector UndefElements;
725 ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
727 if (CN && UndefElements.none())
728 return CN;
729 }
731 return nullptr;
732 }
734 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
735 SDValue N0, SDValue N1) {
736 EVT VT = N0.getValueType();
737 if (N0.getOpcode() == Opc) {
738 if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
739 if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
740 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
741 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
742 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
743 return SDValue();
744 }
745 if (N0.hasOneUse()) {
746 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
747 // use
748 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
749 if (!OpNode.getNode())
750 return SDValue();
751 AddToWorklist(OpNode.getNode());
752 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
753 }
754 }
755 }
757 if (N1.getOpcode() == Opc) {
758 if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
759 if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
760 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
761 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
762 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
763 return SDValue();
764 }
765 if (N1.hasOneUse()) {
766 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
767 // use
768 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
769 if (!OpNode.getNode())
770 return SDValue();
771 AddToWorklist(OpNode.getNode());
772 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
773 }
774 }
775 }
777 return SDValue();
778 }
780 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
781 bool AddTo) {
782 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
783 ++NodesCombined;
784 DEBUG(dbgs() << "\nReplacing.1 ";
785 N->dump(&DAG);
786 dbgs() << "\nWith: ";
787 To[0].getNode()->dump(&DAG);
788 dbgs() << " and " << NumTo-1 << " other values\n");
789 for (unsigned i = 0, e = NumTo; i != e; ++i)
790 assert((!To[i].getNode() ||
791 N->getValueType(i) == To[i].getValueType()) &&
792 "Cannot combine value to value of different type!");
794 WorklistRemover DeadNodes(*this);
795 DAG.ReplaceAllUsesWith(N, To);
796 if (AddTo) {
797 // Push the new nodes and any users onto the worklist
798 for (unsigned i = 0, e = NumTo; i != e; ++i) {
799 if (To[i].getNode()) {
800 AddToWorklist(To[i].getNode());
801 AddUsersToWorklist(To[i].getNode());
802 }
803 }
804 }
806 // Finally, if the node is now dead, remove it from the graph. The node
807 // may not be dead if the replacement process recursively simplified to
808 // something else needing this node.
809 if (N->use_empty())
810 deleteAndRecombine(N);
811 return SDValue(N, 0);
812 }
814 void DAGCombiner::
815 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
816 // Replace all uses. If any nodes become isomorphic to other nodes and
817 // are deleted, make sure to remove them from our worklist.
818 WorklistRemover DeadNodes(*this);
819 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
821 // Push the new node and any (possibly new) users onto the worklist.
822 AddToWorklist(TLO.New.getNode());
823 AddUsersToWorklist(TLO.New.getNode());
825 // Finally, if the node is now dead, remove it from the graph. The node
826 // may not be dead if the replacement process recursively simplified to
827 // something else needing this node.
828 if (TLO.Old.getNode()->use_empty())
829 deleteAndRecombine(TLO.Old.getNode());
830 }
832 /// Check the specified integer node value to see if it can be simplified or if
833 /// things it uses can be simplified by bit propagation. If so, return true.
834 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
835 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
836 APInt KnownZero, KnownOne;
837 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
838 return false;
840 // Revisit the node.
841 AddToWorklist(Op.getNode());
843 // Replace the old value with the new one.
844 ++NodesCombined;
845 DEBUG(dbgs() << "\nReplacing.2 ";
846 TLO.Old.getNode()->dump(&DAG);
847 dbgs() << "\nWith: ";
848 TLO.New.getNode()->dump(&DAG);
849 dbgs() << '\n');
851 CommitTargetLoweringOpt(TLO);
852 return true;
853 }
855 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
856 SDLoc dl(Load);
857 EVT VT = Load->getValueType(0);
858 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
860 DEBUG(dbgs() << "\nReplacing.9 ";
861 Load->dump(&DAG);
862 dbgs() << "\nWith: ";
863 Trunc.getNode()->dump(&DAG);
864 dbgs() << '\n');
865 WorklistRemover DeadNodes(*this);
866 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
867 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
868 deleteAndRecombine(Load);
869 AddToWorklist(Trunc.getNode());
870 }
872 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
873 Replace = false;
874 SDLoc dl(Op);
875 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
876 EVT MemVT = LD->getMemoryVT();
877 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
878 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
879 : ISD::EXTLOAD)
880 : LD->getExtensionType();
881 Replace = true;
882 return DAG.getExtLoad(ExtType, dl, PVT,
883 LD->getChain(), LD->getBasePtr(),
884 MemVT, LD->getMemOperand());
885 }
887 unsigned Opc = Op.getOpcode();
888 switch (Opc) {
889 default: break;
890 case ISD::AssertSext:
891 return DAG.getNode(ISD::AssertSext, dl, PVT,
892 SExtPromoteOperand(Op.getOperand(0), PVT),
893 Op.getOperand(1));
894 case ISD::AssertZext:
895 return DAG.getNode(ISD::AssertZext, dl, PVT,
896 ZExtPromoteOperand(Op.getOperand(0), PVT),
897 Op.getOperand(1));
898 case ISD::Constant: {
899 unsigned ExtOpc =
900 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
901 return DAG.getNode(ExtOpc, dl, PVT, Op);
902 }
903 }
905 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
906 return SDValue();
907 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
908 }
910 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
911 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
912 return SDValue();
913 EVT OldVT = Op.getValueType();
914 SDLoc dl(Op);
915 bool Replace = false;
916 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
917 if (!NewOp.getNode())
918 return SDValue();
919 AddToWorklist(NewOp.getNode());
921 if (Replace)
922 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
923 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
924 DAG.getValueType(OldVT));
925 }
927 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
928 EVT OldVT = Op.getValueType();
929 SDLoc dl(Op);
930 bool Replace = false;
931 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
932 if (!NewOp.getNode())
933 return SDValue();
934 AddToWorklist(NewOp.getNode());
936 if (Replace)
937 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
938 return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
939 }
941 /// Promote the specified integer binary operation if the target indicates it is
942 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
943 /// i32 since i16 instructions are longer.
944 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
945 if (!LegalOperations)
946 return SDValue();
948 EVT VT = Op.getValueType();
949 if (VT.isVector() || !VT.isInteger())
950 return SDValue();
952 // If operation type is 'undesirable', e.g. i16 on x86, consider
953 // promoting it.
954 unsigned Opc = Op.getOpcode();
955 if (TLI.isTypeDesirableForOp(Opc, VT))
956 return SDValue();
958 EVT PVT = VT;
959 // Consult target whether it is a good idea to promote this operation and
960 // what's the right type to promote it to.
961 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
962 assert(PVT != VT && "Don't know what type to promote to!");
964 bool Replace0 = false;
965 SDValue N0 = Op.getOperand(0);
966 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
967 if (!NN0.getNode())
968 return SDValue();
970 bool Replace1 = false;
971 SDValue N1 = Op.getOperand(1);
972 SDValue NN1;
973 if (N0 == N1)
974 NN1 = NN0;
975 else {
976 NN1 = PromoteOperand(N1, PVT, Replace1);
977 if (!NN1.getNode())
978 return SDValue();
979 }
981 AddToWorklist(NN0.getNode());
982 if (NN1.getNode())
983 AddToWorklist(NN1.getNode());
985 if (Replace0)
986 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
987 if (Replace1)
988 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
990 DEBUG(dbgs() << "\nPromoting ";
991 Op.getNode()->dump(&DAG));
992 SDLoc dl(Op);
993 return DAG.getNode(ISD::TRUNCATE, dl, VT,
994 DAG.getNode(Opc, dl, PVT, NN0, NN1));
995 }
996 return SDValue();
997 }
999 /// Promote the specified integer shift operation if the target indicates it is
1000 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1001 /// i32 since i16 instructions are longer.
1002 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1003 if (!LegalOperations)
1004 return SDValue();
1006 EVT VT = Op.getValueType();
1007 if (VT.isVector() || !VT.isInteger())
1008 return SDValue();
1010 // If operation type is 'undesirable', e.g. i16 on x86, consider
1011 // promoting it.
1012 unsigned Opc = Op.getOpcode();
1013 if (TLI.isTypeDesirableForOp(Opc, VT))
1014 return SDValue();
1016 EVT PVT = VT;
1017 // Consult target whether it is a good idea to promote this operation and
1018 // what's the right type to promote it to.
1019 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1020 assert(PVT != VT && "Don't know what type to promote to!");
1022 bool Replace = false;
1023 SDValue N0 = Op.getOperand(0);
1024 if (Opc == ISD::SRA)
1025 N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
1026 else if (Opc == ISD::SRL)
1027 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
1028 else
1029 N0 = PromoteOperand(N0, PVT, Replace);
1030 if (!N0.getNode())
1031 return SDValue();
1033 AddToWorklist(N0.getNode());
1034 if (Replace)
1035 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1037 DEBUG(dbgs() << "\nPromoting ";
1038 Op.getNode()->dump(&DAG));
1039 SDLoc dl(Op);
1040 return DAG.getNode(ISD::TRUNCATE, dl, VT,
1041 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
1042 }
1043 return SDValue();
1044 }
1046 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1047 if (!LegalOperations)
1048 return SDValue();
1050 EVT VT = Op.getValueType();
1051 if (VT.isVector() || !VT.isInteger())
1052 return SDValue();
1054 // If operation type is 'undesirable', e.g. i16 on x86, consider
1055 // promoting it.
1056 unsigned Opc = Op.getOpcode();
1057 if (TLI.isTypeDesirableForOp(Opc, VT))
1058 return SDValue();
1060 EVT PVT = VT;
1061 // Consult target whether it is a good idea to promote this operation and
1062 // what's the right type to promote it to.
1063 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1064 assert(PVT != VT && "Don't know what type to promote to!");
1065 // fold (aext (aext x)) -> (aext x)
1066 // fold (aext (zext x)) -> (zext x)
1067 // fold (aext (sext x)) -> (sext x)
1068 DEBUG(dbgs() << "\nPromoting ";
1069 Op.getNode()->dump(&DAG));
1070 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1071 }
1072 return SDValue();
1073 }
1075 bool DAGCombiner::PromoteLoad(SDValue Op) {
1076 if (!LegalOperations)
1077 return false;
1079 EVT VT = Op.getValueType();
1080 if (VT.isVector() || !VT.isInteger())
1081 return false;
1083 // If operation type is 'undesirable', e.g. i16 on x86, consider
1084 // promoting it.
1085 unsigned Opc = Op.getOpcode();
1086 if (TLI.isTypeDesirableForOp(Opc, VT))
1087 return false;
1089 EVT PVT = VT;
1090 // Consult target whether it is a good idea to promote this operation and
1091 // what's the right type to promote it to.
1092 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1093 assert(PVT != VT && "Don't know what type to promote to!");
1095 SDLoc dl(Op);
1096 SDNode *N = Op.getNode();
1097 LoadSDNode *LD = cast<LoadSDNode>(N);
1098 EVT MemVT = LD->getMemoryVT();
1099 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1100 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1101 : ISD::EXTLOAD)
1102 : LD->getExtensionType();
1103 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
1104 LD->getChain(), LD->getBasePtr(),
1105 MemVT, LD->getMemOperand());
1106 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
1108 DEBUG(dbgs() << "\nPromoting ";
1109 N->dump(&DAG);
1110 dbgs() << "\nTo: ";
1111 Result.getNode()->dump(&DAG);
1112 dbgs() << '\n');
1113 WorklistRemover DeadNodes(*this);
1114 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1115 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1116 deleteAndRecombine(N);
1117 AddToWorklist(Result.getNode());
1118 return true;
1119 }
1120 return false;
1121 }
1123 /// \brief Recursively delete a node which has no uses and any operands for
1124 /// which it is the only use.
1125 ///
1126 /// Note that this both deletes the nodes and removes them from the worklist.
1127 /// It also adds any nodes who have had a user deleted to the worklist as they
1128 /// may now have only one use and subject to other combines.
1129 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1130 if (!N->use_empty())
1131 return false;
1133 SmallSetVector<SDNode *, 16> Nodes;
1134 Nodes.insert(N);
1135 do {
1136 N = Nodes.pop_back_val();
1137 if (!N)
1138 continue;
1140 if (N->use_empty()) {
1141 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1142 Nodes.insert(N->getOperand(i).getNode());
1144 removeFromWorklist(N);
1145 DAG.DeleteNode(N);
1146 } else {
1147 AddToWorklist(N);
1148 }
1149 } while (!Nodes.empty());
1150 return true;
1151 }
1153 //===----------------------------------------------------------------------===//
1154 // Main DAG Combiner implementation
1155 //===----------------------------------------------------------------------===//
1157 void DAGCombiner::Run(CombineLevel AtLevel) {
1158 // set the instance variables, so that the various visit routines may use it.
1159 Level = AtLevel;
1160 LegalOperations = Level >= AfterLegalizeVectorOps;
1161 LegalTypes = Level >= AfterLegalizeTypes;
1163 // Early exit if this basic block is in an optnone function.
1164 AttributeSet FnAttrs =
1165 DAG.getMachineFunction().getFunction()->getAttributes();
1166 if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
1167 Attribute::OptimizeNone))
1168 return;
1170 // Add all the dag nodes to the worklist.
1171 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
1172 E = DAG.allnodes_end(); I != E; ++I)
1173 AddToWorklist(I);
1175 // Create a dummy node (which is not added to allnodes), that adds a reference
1176 // to the root node, preventing it from being deleted, and tracking any
1177 // changes of the root.
1178 HandleSDNode Dummy(DAG.getRoot());
1180 // while the worklist isn't empty, find a node and
1181 // try and combine it.
1182 while (!WorklistMap.empty()) {
1183 SDNode *N;
1184 // The Worklist holds the SDNodes in order, but it may contain null entries.
1185 do {
1186 N = Worklist.pop_back_val();
1187 } while (!N);
1189 bool GoodWorklistEntry = WorklistMap.erase(N);
1190 (void)GoodWorklistEntry;
1191 assert(GoodWorklistEntry &&
1192 "Found a worklist entry without a corresponding map entry!");
1194 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1195 // N is deleted from the DAG, since they too may now be dead or may have a
1196 // reduced number of uses, allowing other xforms.
1197 if (recursivelyDeleteUnusedNodes(N))
1198 continue;
1200 WorklistRemover DeadNodes(*this);
1202 // If this combine is running after legalizing the DAG, re-legalize any
1203 // nodes pulled off the worklist.
1204 if (Level == AfterLegalizeDAG) {
1205 SmallSetVector<SDNode *, 16> UpdatedNodes;
1206 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1208 for (SDNode *LN : UpdatedNodes) {
1209 AddToWorklist(LN);
1210 AddUsersToWorklist(LN);
1211 }
1212 if (!NIsValid)
1213 continue;
1214 }
1216 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1218 // Add any operands of the new node which have not yet been combined to the
1219 // worklist as well. Because the worklist uniques things already, this
1220 // won't repeatedly process the same operand.
1221 CombinedNodes.insert(N);
1222 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1223 if (!CombinedNodes.count(N->getOperand(i).getNode()))
1224 AddToWorklist(N->getOperand(i).getNode());
1226 SDValue RV = combine(N);
1228 if (!RV.getNode())
1229 continue;
1231 ++NodesCombined;
1233 // If we get back the same node we passed in, rather than a new node or
1234 // zero, we know that the node must have defined multiple values and
1235 // CombineTo was used. Since CombineTo takes care of the worklist
1236 // mechanics for us, we have no work to do in this case.
1237 if (RV.getNode() == N)
1238 continue;
1240 assert(N->getOpcode() != ISD::DELETED_NODE &&
1241 RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
1242 "Node was deleted but visit returned new node!");
1244 DEBUG(dbgs() << " ... into: ";
1245 RV.getNode()->dump(&DAG));
1247 // Transfer debug value.
1248 DAG.TransferDbgValues(SDValue(N, 0), RV);
1249 if (N->getNumValues() == RV.getNode()->getNumValues())
1250 DAG.ReplaceAllUsesWith(N, RV.getNode());
1251 else {
1252 assert(N->getValueType(0) == RV.getValueType() &&
1253 N->getNumValues() == 1 && "Type mismatch");
1254 SDValue OpV = RV;
1255 DAG.ReplaceAllUsesWith(N, &OpV);
1256 }
1258 // Push the new node and any users onto the worklist
1259 AddToWorklist(RV.getNode());
1260 AddUsersToWorklist(RV.getNode());
1262 // Finally, if the node is now dead, remove it from the graph. The node
1263 // may not be dead if the replacement process recursively simplified to
1264 // something else needing this node. This will also take care of adding any
1265 // operands which have lost a user to the worklist.
1266 recursivelyDeleteUnusedNodes(N);
1267 }
1269 // If the root changed (e.g. it was a dead load, update the root).
1270 DAG.setRoot(Dummy.getValue());
1271 DAG.RemoveDeadNodes();
1272 }
1274 SDValue DAGCombiner::visit(SDNode *N) {
1275 switch (N->getOpcode()) {
1276 default: break;
1277 case ISD::TokenFactor: return visitTokenFactor(N);
1278 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1279 case ISD::ADD: return visitADD(N);
1280 case ISD::SUB: return visitSUB(N);
1281 case ISD::ADDC: return visitADDC(N);
1282 case ISD::SUBC: return visitSUBC(N);
1283 case ISD::ADDE: return visitADDE(N);
1284 case ISD::SUBE: return visitSUBE(N);
1285 case ISD::MUL: return visitMUL(N);
1286 case ISD::SDIV: return visitSDIV(N);
1287 case ISD::UDIV: return visitUDIV(N);
1288 case ISD::SREM: return visitSREM(N);
1289 case ISD::UREM: return visitUREM(N);
1290 case ISD::MULHU: return visitMULHU(N);
1291 case ISD::MULHS: return visitMULHS(N);
1292 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1293 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1294 case ISD::SMULO: return visitSMULO(N);
1295 case ISD::UMULO: return visitUMULO(N);
1296 case ISD::SDIVREM: return visitSDIVREM(N);
1297 case ISD::UDIVREM: return visitUDIVREM(N);
1298 case ISD::AND: return visitAND(N);
1299 case ISD::OR: return visitOR(N);
1300 case ISD::XOR: return visitXOR(N);
1301 case ISD::SHL: return visitSHL(N);
1302 case ISD::SRA: return visitSRA(N);
1303 case ISD::SRL: return visitSRL(N);
1304 case ISD::ROTR:
1305 case ISD::ROTL: return visitRotate(N);
1306 case ISD::CTLZ: return visitCTLZ(N);
1307 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1308 case ISD::CTTZ: return visitCTTZ(N);
1309 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1310 case ISD::CTPOP: return visitCTPOP(N);
1311 case ISD::SELECT: return visitSELECT(N);
1312 case ISD::VSELECT: return visitVSELECT(N);
1313 case ISD::SELECT_CC: return visitSELECT_CC(N);
1314 case ISD::SETCC: return visitSETCC(N);
1315 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1316 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1317 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1318 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1319 case ISD::TRUNCATE: return visitTRUNCATE(N);
1320 case ISD::BITCAST: return visitBITCAST(N);
1321 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1322 case ISD::FADD: return visitFADD(N);
1323 case ISD::FSUB: return visitFSUB(N);
1324 case ISD::FMUL: return visitFMUL(N);
1325 case ISD::FMA: return visitFMA(N);
1326 case ISD::FDIV: return visitFDIV(N);
1327 case ISD::FREM: return visitFREM(N);
1328 case ISD::FSQRT: return visitFSQRT(N);
1329 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1330 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1331 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1332 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1333 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1334 case ISD::FP_ROUND: return visitFP_ROUND(N);
1335 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1336 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1337 case ISD::FNEG: return visitFNEG(N);
1338 case ISD::FABS: return visitFABS(N);
1339 case ISD::FFLOOR: return visitFFLOOR(N);
1340 case ISD::FMINNUM: return visitFMINNUM(N);
1341 case ISD::FMAXNUM: return visitFMAXNUM(N);
1342 case ISD::FCEIL: return visitFCEIL(N);
1343 case ISD::FTRUNC: return visitFTRUNC(N);
1344 case ISD::BRCOND: return visitBRCOND(N);
1345 case ISD::BR_CC: return visitBR_CC(N);
1346 case ISD::LOAD: return visitLOAD(N);
1347 case ISD::STORE: return visitSTORE(N);
1348 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1349 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1350 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1351 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1352 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1353 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1354 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1355 case ISD::MLOAD: return visitMLOAD(N);
1356 case ISD::MSTORE: return visitMSTORE(N);
1357 }
1358 return SDValue();
1359 }
1361 SDValue DAGCombiner::combine(SDNode *N) {
1362 SDValue RV = visit(N);
1364 // If nothing happened, try a target-specific DAG combine.
1365 if (!RV.getNode()) {
1366 assert(N->getOpcode() != ISD::DELETED_NODE &&
1367 "Node was deleted but visit returned NULL!");
1369 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1370 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1372 // Expose the DAG combiner to the target combiner impls.
1373 TargetLowering::DAGCombinerInfo
1374 DagCombineInfo(DAG, Level, false, this);
1376 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1377 }
1378 }
1380 // If nothing happened still, try promoting the operation.
1381 if (!RV.getNode()) {
1382 switch (N->getOpcode()) {
1383 default: break;
1384 case ISD::ADD:
1385 case ISD::SUB:
1386 case ISD::MUL:
1387 case ISD::AND:
1388 case ISD::OR:
1389 case ISD::XOR:
1390 RV = PromoteIntBinOp(SDValue(N, 0));
1391 break;
1392 case ISD::SHL:
1393 case ISD::SRA:
1394 case ISD::SRL:
1395 RV = PromoteIntShiftOp(SDValue(N, 0));
1396 break;
1397 case ISD::SIGN_EXTEND:
1398 case ISD::ZERO_EXTEND:
1399 case ISD::ANY_EXTEND:
1400 RV = PromoteExtend(SDValue(N, 0));
1401 break;
1402 case ISD::LOAD:
1403 if (PromoteLoad(SDValue(N, 0)))
1404 RV = SDValue(N, 0);
1405 break;
1406 }
1407 }
1409 // If N is a commutative binary node, try commuting it to enable more
1410 // sdisel CSE.
1411 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
1412 N->getNumValues() == 1) {
1413 SDValue N0 = N->getOperand(0);
1414 SDValue N1 = N->getOperand(1);
1416 // Constant operands are canonicalized to RHS.
1417 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1418 SDValue Ops[] = {N1, N0};
1419 SDNode *CSENode;
1420 if (const BinaryWithFlagsSDNode *BinNode =
1421 dyn_cast<BinaryWithFlagsSDNode>(N)) {
1422 CSENode = DAG.getNodeIfExists(
1423 N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
1424 BinNode->hasNoSignedWrap(), BinNode->isExact());
1425 } else {
1426 CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
1427 }
1428 if (CSENode)
1429 return SDValue(CSENode, 0);
1430 }
1431 }
1433 return RV;
1434 }
1436 /// Given a node, return its input chain if it has one, otherwise return a null
1437 /// sd operand.
1438 static SDValue getInputChainForNode(SDNode *N) {
1439 if (unsigned NumOps = N->getNumOperands()) {
1440 if (N->getOperand(0).getValueType() == MVT::Other)
1441 return N->getOperand(0);
1442 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1443 return N->getOperand(NumOps-1);
1444 for (unsigned i = 1; i < NumOps-1; ++i)
1445 if (N->getOperand(i).getValueType() == MVT::Other)
1446 return N->getOperand(i);
1447 }
1448 return SDValue();
1449 }
1451 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1452 // If N has two operands, where one has an input chain equal to the other,
1453 // the 'other' chain is redundant.
1454 if (N->getNumOperands() == 2) {
1455 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1456 return N->getOperand(0);
1457 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1458 return N->getOperand(1);
1459 }
1461 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1462 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1463 SmallPtrSet<SDNode*, 16> SeenOps;
1464 bool Changed = false; // If we should replace this token factor.
1466 // Start out with this token factor.
1467 TFs.push_back(N);
1469 // Iterate through token factors. The TFs grows when new token factors are
1470 // encountered.
1471 for (unsigned i = 0; i < TFs.size(); ++i) {
1472 SDNode *TF = TFs[i];
1474 // Check each of the operands.
1475 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
1476 SDValue Op = TF->getOperand(i);
1478 switch (Op.getOpcode()) {
1479 case ISD::EntryToken:
1480 // Entry tokens don't need to be added to the list. They are
1481 // rededundant.
1482 Changed = true;
1483 break;
1485 case ISD::TokenFactor:
1486 if (Op.hasOneUse() &&
1487 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
1488 // Queue up for processing.
1489 TFs.push_back(Op.getNode());
1490 // Clean up in case the token factor is removed.
1491 AddToWorklist(Op.getNode());
1492 Changed = true;
1493 break;
1494 }
1495 // Fall thru
1497 default:
1498 // Only add if it isn't already in the list.
1499 if (SeenOps.insert(Op.getNode()).second)
1500 Ops.push_back(Op);
1501 else
1502 Changed = true;
1503 break;
1504 }
1505 }
1506 }
1508 SDValue Result;
1510 // If we've change things around then replace token factor.
1511 if (Changed) {
1512 if (Ops.empty()) {
1513 // The entry token is the only possible outcome.
1514 Result = DAG.getEntryNode();
1515 } else {
1516 // New and improved token factor.
1517 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1518 }
1520 // Don't add users to work list.
1521 return CombineTo(N, Result, false);
1522 }
1524 return Result;
1525 }
1527 /// MERGE_VALUES can always be eliminated.
1528 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1529 WorklistRemover DeadNodes(*this);
1530 // Replacing results may cause a different MERGE_VALUES to suddenly
1531 // be CSE'd with N, and carry its uses with it. Iterate until no
1532 // uses remain, to ensure that the node can be safely deleted.
1533 // First add the users of this node to the work list so that they
1534 // can be tried again once they have new operands.
1535 AddUsersToWorklist(N);
1536 do {
1537 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1538 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1539 } while (!N->use_empty());
1540 deleteAndRecombine(N);
1541 return SDValue(N, 0); // Return N so it doesn't get rechecked!
1542 }
1544 SDValue DAGCombiner::visitADD(SDNode *N) {
1545 SDValue N0 = N->getOperand(0);
1546 SDValue N1 = N->getOperand(1);
1547 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1548 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1549 EVT VT = N0.getValueType();
1551 // fold vector ops
1552 if (VT.isVector()) {
1553 SDValue FoldedVOp = SimplifyVBinOp(N);
1554 if (FoldedVOp.getNode()) return FoldedVOp;
1556 // fold (add x, 0) -> x, vector edition
1557 if (ISD::isBuildVectorAllZeros(N1.getNode()))
1558 return N0;
1559 if (ISD::isBuildVectorAllZeros(N0.getNode()))
1560 return N1;
1561 }
1563 // fold (add x, undef) -> undef
1564 if (N0.getOpcode() == ISD::UNDEF)
1565 return N0;
1566 if (N1.getOpcode() == ISD::UNDEF)
1567 return N1;
1568 // fold (add c1, c2) -> c1+c2
1569 if (N0C && N1C)
1570 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
1571 // canonicalize constant to RHS
1572 if (N0C && !N1C)
1573 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
1574 // fold (add x, 0) -> x
1575 if (N1C && N1C->isNullValue())
1576 return N0;
1577 // fold (add Sym, c) -> Sym+c
1578 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1579 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
1580 GA->getOpcode() == ISD::GlobalAddress)
1581 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
1582 GA->getOffset() +
1583 (uint64_t)N1C->getSExtValue());
1584 // fold ((c1-A)+c2) -> (c1+c2)-A
1585 if (N1C && N0.getOpcode() == ISD::SUB)
1586 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
1587 return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1588 DAG.getConstant(N1C->getAPIntValue()+
1589 N0C->getAPIntValue(), VT),
1590 N0.getOperand(1));
1591 // reassociate add
1592 SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
1593 if (RADD.getNode())
1594 return RADD;
1595 // fold ((0-A) + B) -> B-A
1596 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
1597 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
1598 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
1599 // fold (A + (0-B)) -> A-B
1600 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
1601 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
1602 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
1603 // fold (A+(B-A)) -> B
1604 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1605 return N1.getOperand(0);
1606 // fold ((B-A)+A) -> B
1607 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1608 return N0.getOperand(0);
1609 // fold (A+(B-(A+C))) to (B-C)
1610 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1611 N0 == N1.getOperand(1).getOperand(0))
1612 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
1613 N1.getOperand(1).getOperand(1));
1614 // fold (A+(B-(C+A))) to (B-C)
1615 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1616 N0 == N1.getOperand(1).getOperand(1))
1617 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
1618 N1.getOperand(1).getOperand(0));
1619 // fold (A+((B-A)+or-C)) to (B+or-C)
1620 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1621 N1.getOperand(0).getOpcode() == ISD::SUB &&
1622 N0 == N1.getOperand(0).getOperand(1))
1623 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
1624 N1.getOperand(0).getOperand(0), N1.getOperand(1));
1626 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1627 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1628 SDValue N00 = N0.getOperand(0);
1629 SDValue N01 = N0.getOperand(1);
1630 SDValue N10 = N1.getOperand(0);
1631 SDValue N11 = N1.getOperand(1);
1633 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
1634 return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1635 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1636 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1637 }
1639 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
1640 return SDValue(N, 0);
1642 // fold (a+b) -> (a|b) iff a and b share no bits.
1643 if (VT.isInteger() && !VT.isVector()) {
1644 APInt LHSZero, LHSOne;
1645 APInt RHSZero, RHSOne;
1646 DAG.computeKnownBits(N0, LHSZero, LHSOne);
1648 if (LHSZero.getBoolValue()) {
1649 DAG.computeKnownBits(N1, RHSZero, RHSOne);
1651 // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1652 // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1653 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
1654 if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
1655 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
1656 }
1657 }
1658 }
1660 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1661 if (N1.getOpcode() == ISD::SHL &&
1662 N1.getOperand(0).getOpcode() == ISD::SUB)
1663 if (ConstantSDNode *C =
1664 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
1665 if (C->getAPIntValue() == 0)
1666 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
1667 DAG.getNode(ISD::SHL, SDLoc(N), VT,
1668 N1.getOperand(0).getOperand(1),
1669 N1.getOperand(1)));
1670 if (N0.getOpcode() == ISD::SHL &&
1671 N0.getOperand(0).getOpcode() == ISD::SUB)
1672 if (ConstantSDNode *C =
1673 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
1674 if (C->getAPIntValue() == 0)
1675 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
1676 DAG.getNode(ISD::SHL, SDLoc(N), VT,
1677 N0.getOperand(0).getOperand(1),
1678 N0.getOperand(1)));
1680 if (N1.getOpcode() == ISD::AND) {
1681 SDValue AndOp0 = N1.getOperand(0);
1682 ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
1683 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
1684 unsigned DestBits = VT.getScalarType().getSizeInBits();
1686 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
1687 // and similar xforms where the inner op is either ~0 or 0.
1688 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
1689 SDLoc DL(N);
1690 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
1691 }
1692 }
1694 // add (sext i1), X -> sub X, (zext i1)
1695 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1696 N0.getOperand(0).getValueType() == MVT::i1 &&
1697 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
1698 SDLoc DL(N);
1699 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
1700 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
1701 }
1703 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
1704 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1705 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1706 if (TN->getVT() == MVT::i1) {
1707 SDLoc DL(N);
1708 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1709 DAG.getConstant(1, VT));
1710 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
1711 }
1712 }
1714 return SDValue();
1715 }
1717 SDValue DAGCombiner::visitADDC(SDNode *N) {
1718 SDValue N0 = N->getOperand(0);
1719 SDValue N1 = N->getOperand(1);
1720 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1721 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1722 EVT VT = N0.getValueType();
1724 // If the flag result is dead, turn this into an ADD.
1725 if (!N->hasAnyUseOfValue(1))
1726 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
1727 DAG.getNode(ISD::CARRY_FALSE,
1728 SDLoc(N), MVT::Glue));
1730 // canonicalize constant to RHS.
1731 if (N0C && !N1C)
1732 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
1734 // fold (addc x, 0) -> x + no carry out
1735 if (N1C && N1C->isNullValue())
1736 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
1737 SDLoc(N), MVT::Glue));
1739 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
1740 APInt LHSZero, LHSOne;
1741 APInt RHSZero, RHSOne;
1742 DAG.computeKnownBits(N0, LHSZero, LHSOne);
1744 if (LHSZero.getBoolValue()) {
1745 DAG.computeKnownBits(N1, RHSZero, RHSOne);
1747 // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1748 // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1749 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
1750 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
1751 DAG.getNode(ISD::CARRY_FALSE,
1752 SDLoc(N), MVT::Glue));
1753 }
1755 return SDValue();
1756 }
1758 SDValue DAGCombiner::visitADDE(SDNode *N) {
1759 SDValue N0 = N->getOperand(0);
1760 SDValue N1 = N->getOperand(1);
1761 SDValue CarryIn = N->getOperand(2);
1762 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1763 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1765 // canonicalize constant to RHS
1766 if (N0C && !N1C)
1767 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
1768 N1, N0, CarryIn);
1770 // fold (adde x, y, false) -> (addc x, y)
1771 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1772 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
1774 return SDValue();
1775 }
1777 // Since it may not be valid to emit a fold to zero for vector initializers
1778 // check if we can before folding.
1779 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
1780 SelectionDAG &DAG,
1781 bool LegalOperations, bool LegalTypes) {
1782 if (!VT.isVector())
1783 return DAG.getConstant(0, VT);
1784 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
1785 return DAG.getConstant(0, VT);
1786 return SDValue();
1787 }
1789 SDValue DAGCombiner::visitSUB(SDNode *N) {
1790 SDValue N0 = N->getOperand(0);
1791 SDValue N1 = N->getOperand(1);
1792 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
1793 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
1794 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
1795 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
1796 EVT VT = N0.getValueType();
1798 // fold vector ops
1799 if (VT.isVector()) {
1800 SDValue FoldedVOp = SimplifyVBinOp(N);
1801 if (FoldedVOp.getNode()) return FoldedVOp;
1803 // fold (sub x, 0) -> x, vector edition
1804 if (ISD::isBuildVectorAllZeros(N1.getNode()))
1805 return N0;
1806 }
1808 // fold (sub x, x) -> 0
1809 // FIXME: Refactor this and xor and other similar operations together.
1810 if (N0 == N1)
1811 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
1812 // fold (sub c1, c2) -> c1-c2
1813 if (N0C && N1C)
1814 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
1815 // fold (sub x, c) -> (add x, -c)
1816 if (N1C)
1817 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
1818 DAG.getConstant(-N1C->getAPIntValue(), VT));
1819 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
1820 if (N0C && N0C->isAllOnesValue())
1821 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
1822 // fold A-(A-B) -> B
1823 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
1824 return N1.getOperand(1);
1825 // fold (A+B)-A -> B
1826 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
1827 return N0.getOperand(1);
1828 // fold (A+B)-B -> A
1829 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
1830 return N0.getOperand(0);
1831 // fold C2-(A+C1) -> (C2-C1)-A
1832 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
1833 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
1834 VT);
1835 return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
1836 N1.getOperand(0));
1837 }
1838 // fold ((A+(B+or-C))-B) -> A+or-C
1839 if (N0.getOpcode() == ISD::ADD &&
1840 (N0.getOperand(1).getOpcode() == ISD::SUB ||
1841 N0.getOperand(1).getOpcode() == ISD::ADD) &&
1842 N0.getOperand(1).getOperand(0) == N1)
1843 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
1844 N0.getOperand(0), N0.getOperand(1).getOperand(1));
1845 // fold ((A+(C+B))-B) -> A+C
1846 if (N0.getOpcode() == ISD::ADD &&
1847 N0.getOperand(1).getOpcode() == ISD::ADD &&
1848 N0.getOperand(1).getOperand(1) == N1)
1849 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
1850 N0.getOperand(0), N0.getOperand(1).getOperand(0));
1851 // fold ((A-(B-C))-C) -> A-B
1852 if (N0.getOpcode() == ISD::SUB &&
1853 N0.getOperand(1).getOpcode() == ISD::SUB &&
1854 N0.getOperand(1).getOperand(1) == N1)
1855 return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1856 N0.getOperand(0), N0.getOperand(1).getOperand(0));
1858 // If either operand of a sub is undef, the result is undef
1859 if (N0.getOpcode() == ISD::UNDEF)
1860 return N0;
1861 if (N1.getOpcode() == ISD::UNDEF)
1862 return N1;
1864 // If the relocation model supports it, consider symbol offsets.
1865 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1866 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
1867 // fold (sub Sym, c) -> Sym-c
1868 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
1869 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
1870 GA->getOffset() -
1871 (uint64_t)N1C->getSExtValue());
1872 // fold (sub Sym+c1, Sym+c2) -> c1-c2
1873 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
1874 if (GA->getGlobal() == GB->getGlobal())
1875 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
1876 VT);
1877 }
1879 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
1880 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1881 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1882 if (TN->getVT() == MVT::i1) {
1883 SDLoc DL(N);
1884 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1885 DAG.getConstant(1, VT));
1886 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
1887 }
1888 }
1890 return SDValue();
1891 }
1893 SDValue DAGCombiner::visitSUBC(SDNode *N) {
1894 SDValue N0 = N->getOperand(0);
1895 SDValue N1 = N->getOperand(1);
1896 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1897 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1898 EVT VT = N0.getValueType();
1900 // If the flag result is dead, turn this into an SUB.
1901 if (!N->hasAnyUseOfValue(1))
1902 return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
1903 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1904 MVT::Glue));
1906 // fold (subc x, x) -> 0 + no borrow
1907 if (N0 == N1)
1908 return CombineTo(N, DAG.getConstant(0, VT),
1909 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1910 MVT::Glue));
1912 // fold (subc x, 0) -> x + no borrow
1913 if (N1C && N1C->isNullValue())
1914 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1915 MVT::Glue));
1917 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
1918 if (N0C && N0C->isAllOnesValue())
1919 return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
1920 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1921 MVT::Glue));
1923 return SDValue();
1924 }
1926 SDValue DAGCombiner::visitSUBE(SDNode *N) {
1927 SDValue N0 = N->getOperand(0);
1928 SDValue N1 = N->getOperand(1);
1929 SDValue CarryIn = N->getOperand(2);
1931 // fold (sube x, y, false) -> (subc x, y)
1932 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1933 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
1935 return SDValue();
1936 }
1938 SDValue DAGCombiner::visitMUL(SDNode *N) {
1939 SDValue N0 = N->getOperand(0);
1940 SDValue N1 = N->getOperand(1);
1941 EVT VT = N0.getValueType();
1943 // fold (mul x, undef) -> 0
1944 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
1945 return DAG.getConstant(0, VT);
1947 bool N0IsConst = false;
1948 bool N1IsConst = false;
1949 APInt ConstValue0, ConstValue1;
1950 // fold vector ops
1951 if (VT.isVector()) {
1952 SDValue FoldedVOp = SimplifyVBinOp(N);
1953 if (FoldedVOp.getNode()) return FoldedVOp;
1955 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
1956 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
1957 } else {
1958 N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
1959 ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
1960 : APInt();
1961 N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
1962 ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
1963 : APInt();
1964 }
1966 // fold (mul c1, c2) -> c1*c2
1967 if (N0IsConst && N1IsConst)
1968 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
1970 // canonicalize constant to RHS
1971 if (N0IsConst && !N1IsConst)
1972 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
1973 // fold (mul x, 0) -> 0
1974 if (N1IsConst && ConstValue1 == 0)
1975 return N1;
1976 // We require a splat of the entire scalar bit width for non-contiguous
1977 // bit patterns.
1978 bool IsFullSplat =
1979 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
1980 // fold (mul x, 1) -> x
1981 if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
1982 return N0;
1983 // fold (mul x, -1) -> 0-x
1984 if (N1IsConst && ConstValue1.isAllOnesValue())
1985 return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1986 DAG.getConstant(0, VT), N0);
1987 // fold (mul x, (1 << c)) -> x << c
1988 if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
1989 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
1990 DAG.getConstant(ConstValue1.logBase2(),
1991 getShiftAmountTy(N0.getValueType())));
1992 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
1993 if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
1994 unsigned Log2Val = (-ConstValue1).logBase2();
1995 // FIXME: If the input is something that is easily negated (e.g. a
1996 // single-use add), we should put the negate there.
1997 return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1998 DAG.getConstant(0, VT),
1999 DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
2000 DAG.getConstant(Log2Val,
2001 getShiftAmountTy(N0.getValueType()))));
2002 }
2004 APInt Val;
2005 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2006 if (N1IsConst && N0.getOpcode() == ISD::SHL &&
2007 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
2008 isa<ConstantSDNode>(N0.getOperand(1)))) {
2009 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
2010 N1, N0.getOperand(1));
2011 AddToWorklist(C3.getNode());
2012 return DAG.getNode(ISD::MUL, SDLoc(N), VT,
2013 N0.getOperand(0), C3);
2014 }
2016 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2017 // use.
2018 {
2019 SDValue Sh(nullptr,0), Y(nullptr,0);
2020 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2021 if (N0.getOpcode() == ISD::SHL &&
2022 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
2023 isa<ConstantSDNode>(N0.getOperand(1))) &&
2024 N0.getNode()->hasOneUse()) {
2025 Sh = N0; Y = N1;
2026 } else if (N1.getOpcode() == ISD::SHL &&
2027 isa<ConstantSDNode>(N1.getOperand(1)) &&
2028 N1.getNode()->hasOneUse()) {
2029 Sh = N1; Y = N0;
2030 }
2032 if (Sh.getNode()) {
2033 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
2034 Sh.getOperand(0), Y);
2035 return DAG.getNode(ISD::SHL, SDLoc(N), VT,
2036 Mul, Sh.getOperand(1));
2037 }
2038 }
2040 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2041 if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
2042 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
2043 isa<ConstantSDNode>(N0.getOperand(1))))
2044 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2045 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2046 N0.getOperand(0), N1),
2047 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2048 N0.getOperand(1), N1));
2050 // reassociate mul
2051 SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
2052 if (RMUL.getNode())
2053 return RMUL;
2055 return SDValue();
2056 }
2058 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2059 SDValue N0 = N->getOperand(0);
2060 SDValue N1 = N->getOperand(1);
2061 ConstantSDNode *N0C = isConstOrConstSplat(N0);
2062 ConstantSDNode *N1C = isConstOrConstSplat(N1);
2063 EVT VT = N->getValueType(0);
2065 // fold vector ops
2066 if (VT.isVector()) {
2067 SDValue FoldedVOp = SimplifyVBinOp(N);
2068 if (FoldedVOp.getNode()) return FoldedVOp;
2069 }
2071 // fold (sdiv c1, c2) -> c1/c2
2072 if (N0C && N1C && !N1C->isNullValue())
2073 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
2074 // fold (sdiv X, 1) -> X
2075 if (N1C && N1C->getAPIntValue() == 1LL)
2076 return N0;
2077 // fold (sdiv X, -1) -> 0-X
2078 if (N1C && N1C->isAllOnesValue())
2079 return DAG.getNode(ISD::SUB, SDLoc(N), VT,
2080 DAG.getConstant(0, VT), N0);
2081 // If we know the sign bits of both operands are zero, strength reduce to a
2082 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
2083 if (!VT.isVector()) {
2084 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2085 return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
2086 N0, N1);
2087 }
2089 // fold (sdiv X, pow2) -> simple ops after legalize
2090 if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
2091 (-N1C->getAPIntValue()).isPowerOf2())) {
2092 // If dividing by powers of two is cheap, then don't perform the following
2093 // fold.
2094 if (TLI.isPow2SDivCheap())
2095 return SDValue();
2097 // Target-specific implementation of sdiv x, pow2.
2098 SDValue Res = BuildSDIVPow2(N);
2099 if (Res.getNode())
2100 return Res;
2102 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2104 // Splat the sign bit into the register
2105 SDValue SGN =
2106 DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
2107 DAG.getConstant(VT.getScalarSizeInBits() - 1,
2108 getShiftAmountTy(N0.getValueType())));
2109 AddToWorklist(SGN.getNode());
2111 // Add (N0 < 0) ? abs2 - 1 : 0;
2112 SDValue SRL =
2113 DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
2114 DAG.getConstant(VT.getScalarSizeInBits() - lg2,
2115 getShiftAmountTy(SGN.getValueType())));
2116 SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
2117 AddToWorklist(SRL.getNode());
2118 AddToWorklist(ADD.getNode()); // Divide by pow2
2119 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
2120 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
2122 // If we're dividing by a positive value, we're done. Otherwise, we must
2123 // negate the result.
2124 if (N1C->getAPIntValue().isNonNegative())
2125 return SRA;
2127 AddToWorklist(SRA.getNode());
2128 return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
2129 }
2131 // if integer divide is expensive and we satisfy the requirements, emit an
2132 // alternate sequence.
2133 if (N1C && !TLI.isIntDivCheap()) {
2134 SDValue Op = BuildSDIV(N);
2135 if (Op.getNode()) return Op;
2136 }
2138 // undef / X -> 0
2139 if (N0.getOpcode() == ISD::UNDEF)
2140 return DAG.getConstant(0, VT);
2141 // X / undef -> undef
2142 if (N1.getOpcode() == ISD::UNDEF)
2143 return N1;
2145 return SDValue();
2146 }
2148 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2149 SDValue N0 = N->getOperand(0);
2150 SDValue N1 = N->getOperand(1);
2151 ConstantSDNode *N0C = isConstOrConstSplat(N0);
2152 ConstantSDNode *N1C = isConstOrConstSplat(N1);
2153 EVT VT = N->getValueType(0);
2155 // fold vector ops
2156 if (VT.isVector()) {
2157 SDValue FoldedVOp = SimplifyVBinOp(N);
2158 if (FoldedVOp.getNode()) return FoldedVOp;
2159 }
2161 // fold (udiv c1, c2) -> c1/c2
2162 if (N0C && N1C && !N1C->isNullValue())
2163 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
2164 // fold (udiv x, (1 << c)) -> x >>u c
2165 if (N1C && N1C->getAPIntValue().isPowerOf2())
2166 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
2167 DAG.getConstant(N1C->getAPIntValue().logBase2(),
2168 getShiftAmountTy(N0.getValueType())));
2169 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2170 if (N1.getOpcode() == ISD::SHL) {
2171 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
2172 if (SHC->getAPIntValue().isPowerOf2()) {
2173 EVT ADDVT = N1.getOperand(1).getValueType();
2174 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
2175 N1.getOperand(1),
2176 DAG.getConstant(SHC->getAPIntValue()
2177 .logBase2(),
2178 ADDVT));
2179 AddToWorklist(Add.getNode());
2180 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
2181 }
2182 }
2183 }
2184 // fold (udiv x, c) -> alternate
2185 if (N1C && !TLI.isIntDivCheap()) {
2186 SDValue Op = BuildUDIV(N);
2187 if (Op.getNode()) return Op;
2188 }
2190 // undef / X -> 0
2191 if (N0.getOpcode() == ISD::UNDEF)
2192 return DAG.getConstant(0, VT);
2193 // X / undef -> undef
2194 if (N1.getOpcode() == ISD::UNDEF)
2195 return N1;
2197 return SDValue();
2198 }
2200 SDValue DAGCombiner::visitSREM(SDNode *N) {
2201 SDValue N0 = N->getOperand(0);
2202 SDValue N1 = N->getOperand(1);
2203 ConstantSDNode *N0C = isConstOrConstSplat(N0);
2204 ConstantSDNode *N1C = isConstOrConstSplat(N1);
2205 EVT VT = N->getValueType(0);
2207 // fold (srem c1, c2) -> c1%c2
2208 if (N0C && N1C && !N1C->isNullValue())
2209 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
2210 // If we know the sign bits of both operands are zero, strength reduce to a
2211 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2212 if (!VT.isVector()) {
2213 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2214 return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
2215 }
2217 // If X/C can be simplified by the division-by-constant logic, lower
2218 // X%C to the equivalent of X-X/C*C.
2219 if (N1C && !N1C->isNullValue()) {
2220 SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
2221 AddToWorklist(Div.getNode());
2222 SDValue OptimizedDiv = combine(Div.getNode());
2223 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2224 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
2225 OptimizedDiv, N1);
2226 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
2227 AddToWorklist(Mul.getNode());
2228 return Sub;
2229 }
2230 }
2232 // undef % X -> 0
2233 if (N0.getOpcode() == ISD::UNDEF)
2234 return DAG.getConstant(0, VT);
2235 // X % undef -> undef
2236 if (N1.getOpcode() == ISD::UNDEF)
2237 return N1;
2239 return SDValue();
2240 }
2242 SDValue DAGCombiner::visitUREM(SDNode *N) {
2243 SDValue N0 = N->getOperand(0);
2244 SDValue N1 = N->getOperand(1);
2245 ConstantSDNode *N0C = isConstOrConstSplat(N0);
2246 ConstantSDNode *N1C = isConstOrConstSplat(N1);
2247 EVT VT = N->getValueType(0);
2249 // fold (urem c1, c2) -> c1%c2
2250 if (N0C && N1C && !N1C->isNullValue())
2251 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
2252 // fold (urem x, pow2) -> (and x, pow2-1)
2253 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
2254 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
2255 DAG.getConstant(N1C->getAPIntValue()-1,VT));
2256 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2257 if (N1.getOpcode() == ISD::SHL) {
2258 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
2259 if (SHC->getAPIntValue().isPowerOf2()) {
2260 SDValue Add =
2261 DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
2262 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
2263 VT));
2264 AddToWorklist(Add.getNode());
2265 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
2266 }
2267 }
2268 }
2270 // If X/C can be simplified by the division-by-constant logic, lower
2271 // X%C to the equivalent of X-X/C*C.
2272 if (N1C && !N1C->isNullValue()) {
2273 SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
2274 AddToWorklist(Div.getNode());
2275 SDValue OptimizedDiv = combine(Div.getNode());
2276 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2277 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
2278 OptimizedDiv, N1);
2279 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
2280 AddToWorklist(Mul.getNode());
2281 return Sub;
2282 }
2283 }
2285 // undef % X -> 0
2286 if (N0.getOpcode() == ISD::UNDEF)
2287 return DAG.getConstant(0, VT);
2288 // X % undef -> undef
2289 if (N1.getOpcode() == ISD::UNDEF)
2290 return N1;
2292 return SDValue();
2293 }
2295 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2296 SDValue N0 = N->getOperand(0);
2297 SDValue N1 = N->getOperand(1);
2298 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2299 EVT VT = N->getValueType(0);
2300 SDLoc DL(N);
2302 // fold (mulhs x, 0) -> 0
2303 if (N1C && N1C->isNullValue())
2304 return N1;
2305 // fold (mulhs x, 1) -> (sra x, size(x)-1)
2306 if (N1C && N1C->getAPIntValue() == 1)
2307 return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0,
2308 DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
2309 getShiftAmountTy(N0.getValueType())));
2310 // fold (mulhs x, undef) -> 0
2311 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2312 return DAG.getConstant(0, VT);
2314 // If the type twice as wide is legal, transform the mulhs to a wider multiply
2315 // plus a shift.
2316 if (VT.isSimple() && !VT.isVector()) {
2317 MVT Simple = VT.getSimpleVT();
2318 unsigned SimpleSize = Simple.getSizeInBits();
2319 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2320 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2321 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
2322 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
2323 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2324 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2325 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
2326 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2327 }
2328 }
2330 return SDValue();
2331 }
2333 SDValue DAGCombiner::visitMULHU(SDNode *N) {
2334 SDValue N0 = N->getOperand(0);
2335 SDValue N1 = N->getOperand(1);
2336 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2337 EVT VT = N->getValueType(0);
2338 SDLoc DL(N);
2340 // fold (mulhu x, 0) -> 0
2341 if (N1C && N1C->isNullValue())
2342 return N1;
2343 // fold (mulhu x, 1) -> 0
2344 if (N1C && N1C->getAPIntValue() == 1)
2345 return DAG.getConstant(0, N0.getValueType());
2346 // fold (mulhu x, undef) -> 0
2347 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2348 return DAG.getConstant(0, VT);
2350 // If the type twice as wide is legal, transform the mulhu to a wider multiply
2351 // plus a shift.
2352 if (VT.isSimple() && !VT.isVector()) {
2353 MVT Simple = VT.getSimpleVT();
2354 unsigned SimpleSize = Simple.getSizeInBits();
2355 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2356 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2357 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
2358 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
2359 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2360 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2361 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
2362 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2363 }
2364 }
2366 return SDValue();
2367 }
2369 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
2370 /// give the opcodes for the two computations that are being performed. Return
2371 /// true if a simplification was made.
2372 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
2373 unsigned HiOp) {
2374 // If the high half is not needed, just compute the low half.
2375 bool HiExists = N->hasAnyUseOfValue(1);
2376 if (!HiExists &&
2377 (!LegalOperations ||
2378 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
2379 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2380 return CombineTo(N, Res, Res);
2381 }
2383 // If the low half is not needed, just compute the high half.
2384 bool LoExists = N->hasAnyUseOfValue(0);
2385 if (!LoExists &&
2386 (!LegalOperations ||
2387 TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
2388 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2389 return CombineTo(N, Res, Res);
2390 }
2392 // If both halves are used, return as it is.
2393 if (LoExists && HiExists)
2394 return SDValue();
2396 // If the two computed results can be simplified separately, separate them.
2397 if (LoExists) {
2398 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2399 AddToWorklist(Lo.getNode());
2400 SDValue LoOpt = combine(Lo.getNode());
2401 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
2402 (!LegalOperations ||
2403 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
2404 return CombineTo(N, LoOpt, LoOpt);
2405 }
2407 if (HiExists) {
2408 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2409 AddToWorklist(Hi.getNode());
2410 SDValue HiOpt = combine(Hi.getNode());
2411 if (HiOpt.getNode() && HiOpt != Hi &&
2412 (!LegalOperations ||
2413 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
2414 return CombineTo(N, HiOpt, HiOpt);
2415 }
2417 return SDValue();
2418 }
2420 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
2421 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
2422 if (Res.getNode()) return Res;
2424 EVT VT = N->getValueType(0);
2425 SDLoc DL(N);
2427 // If the type twice as wide is legal, transform the mulhu to a wider multiply
2428 // plus a shift.
2429 if (VT.isSimple() && !VT.isVector()) {
2430 MVT Simple = VT.getSimpleVT();
2431 unsigned SimpleSize = Simple.getSizeInBits();
2432 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2433 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2434 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
2435 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
2436 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2437 // Compute the high part as N1.
2438 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2439 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
2440 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2441 // Compute the low part as N0.
2442 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2443 return CombineTo(N, Lo, Hi);
2444 }
2445 }
2447 return SDValue();
2448 }
2450 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
2451 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
2452 if (Res.getNode()) return Res;
2454 EVT VT = N->getValueType(0);
2455 SDLoc DL(N);
2457 // If the type twice as wide is legal, transform the mulhu to a wider multiply
2458 // plus a shift.
2459 if (VT.isSimple() && !VT.isVector()) {
2460 MVT Simple = VT.getSimpleVT();
2461 unsigned SimpleSize = Simple.getSizeInBits();
2462 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2463 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2464 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
2465 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
2466 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2467 // Compute the high part as N1.
2468 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2469 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
2470 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2471 // Compute the low part as N0.
2472 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2473 return CombineTo(N, Lo, Hi);
2474 }
2475 }
2477 return SDValue();
2478 }
2480 SDValue DAGCombiner::visitSMULO(SDNode *N) {
2481 // (smulo x, 2) -> (saddo x, x)
2482 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2483 if (C2->getAPIntValue() == 2)
2484 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
2485 N->getOperand(0), N->getOperand(0));
2487 return SDValue();
2488 }
2490 SDValue DAGCombiner::visitUMULO(SDNode *N) {
2491 // (umulo x, 2) -> (uaddo x, x)
2492 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2493 if (C2->getAPIntValue() == 2)
2494 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
2495 N->getOperand(0), N->getOperand(0));
2497 return SDValue();
2498 }
2500 SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
2501 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
2502 if (Res.getNode()) return Res;
2504 return SDValue();
2505 }
2507 SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
2508 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
2509 if (Res.getNode()) return Res;
2511 return SDValue();
2512 }
2514 /// If this is a binary operator with two operands of the same opcode, try to
2515 /// simplify it.
2516 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
2517 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2518 EVT VT = N0.getValueType();
2519 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
2521 // Bail early if none of these transforms apply.
2522 if (N0.getNode()->getNumOperands() == 0) return SDValue();
2524 // For each of OP in AND/OR/XOR:
2525 // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
2526 // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
2527 // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
2528 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
2529 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
2530 //
2531 // do not sink logical op inside of a vector extend, since it may combine
2532 // into a vsetcc.
2533 EVT Op0VT = N0.getOperand(0).getValueType();
2534 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
2535 N0.getOpcode() == ISD::SIGN_EXTEND ||
2536 N0.getOpcode() == ISD::BSWAP ||
2537 // Avoid infinite looping with PromoteIntBinOp.
2538 (N0.getOpcode() == ISD::ANY_EXTEND &&
2539 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
2540 (N0.getOpcode() == ISD::TRUNCATE &&
2541 (!TLI.isZExtFree(VT, Op0VT) ||
2542 !TLI.isTruncateFree(Op0VT, VT)) &&
2543 TLI.isTypeLegal(Op0VT))) &&
2544 !VT.isVector() &&
2545 Op0VT == N1.getOperand(0).getValueType() &&
2546 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
2547 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2548 N0.getOperand(0).getValueType(),
2549 N0.getOperand(0), N1.getOperand(0));
2550 AddToWorklist(ORNode.getNode());
2551 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
2552 }
2554 // For each of OP in SHL/SRL/SRA/AND...
2555 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
2556 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
2557 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
2558 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
2559 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
2560 N0.getOperand(1) == N1.getOperand(1)) {
2561 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2562 N0.getOperand(0).getValueType(),
2563 N0.getOperand(0), N1.getOperand(0));
2564 AddToWorklist(ORNode.getNode());
2565 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
2566 ORNode, N0.getOperand(1));
2567 }
2569 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
2570 // Only perform this optimization after type legalization and before
2571 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
2572 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
2573 // we don't want to undo this promotion.
2574 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
2575 // on scalars.
2576 if ((N0.getOpcode() == ISD::BITCAST ||
2577 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
2578 Level == AfterLegalizeTypes) {
2579 SDValue In0 = N0.getOperand(0);
2580 SDValue In1 = N1.getOperand(0);
2581 EVT In0Ty = In0.getValueType();
2582 EVT In1Ty = In1.getValueType();
2583 SDLoc DL(N);
2584 // If both incoming values are integers, and the original types are the
2585 // same.
2586 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
2587 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
2588 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
2589 AddToWorklist(Op.getNode());
2590 return BC;
2591 }
2592 }
2594 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
2595 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
2596 // If both shuffles use the same mask, and both shuffle within a single
2597 // vector, then it is worthwhile to move the swizzle after the operation.
2598 // The type-legalizer generates this pattern when loading illegal
2599 // vector types from memory. In many cases this allows additional shuffle
2600 // optimizations.
2601 // There are other cases where moving the shuffle after the xor/and/or
2602 // is profitable even if shuffles don't perform a swizzle.
2603 // If both shuffles use the same mask, and both shuffles have the same first
2604 // or second operand, then it might still be profitable to move the shuffle
2605 // after the xor/and/or operation.
2606 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
2607 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
2608 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
2610 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
2611 "Inputs to shuffles are not the same type");
2613 // Check that both shuffles use the same mask. The masks are known to be of
2614 // the same length because the result vector type is the same.
2615 // Check also that shuffles have only one use to avoid introducing extra
2616 // instructions.
2617 if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
2618 SVN0->getMask().equals(SVN1->getMask())) {
2619 SDValue ShOp = N0->getOperand(1);
2621 // Don't try to fold this node if it requires introducing a
2622 // build vector of all zeros that might be illegal at this stage.
2623 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
2624 if (!LegalTypes)
2625 ShOp = DAG.getConstant(0, VT);
2626 else
2627 ShOp = SDValue();
2628 }
2630 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
2631 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
2632 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
2633 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
2634 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2635 N0->getOperand(0), N1->getOperand(0));
2636 AddToWorklist(NewNode.getNode());
2637 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
2638 &SVN0->getMask()[0]);
2639 }
2641 // Don't try to fold this node if it requires introducing a
2642 // build vector of all zeros that might be illegal at this stage.
2643 ShOp = N0->getOperand(0);
2644 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
2645 if (!LegalTypes)
2646 ShOp = DAG.getConstant(0, VT);
2647 else
2648 ShOp = SDValue();
2649 }
2651 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
2652 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
2653 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
2654 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
2655 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2656 N0->getOperand(1), N1->getOperand(1));
2657 AddToWorklist(NewNode.getNode());
2658 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
2659 &SVN0->getMask()[0]);
2660 }
2661 }
2662 }
2664 return SDValue();
2665 }
2667 SDValue DAGCombiner::visitAND(SDNode *N) {
2668 SDValue N0 = N->getOperand(0);
2669 SDValue N1 = N->getOperand(1);
2670 SDValue LL, LR, RL, RR, CC0, CC1;
2671 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2672 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2673 EVT VT = N1.getValueType();
2674 unsigned BitWidth = VT.getScalarType().getSizeInBits();
2676 // fold vector ops
2677 if (VT.isVector()) {
2678 SDValue FoldedVOp = SimplifyVBinOp(N);
2679 if (FoldedVOp.getNode()) return FoldedVOp;
2681 // fold (and x, 0) -> 0, vector edition
2682 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2683 // do not return N0, because undef node may exist in N0
2684 return DAG.getConstant(
2685 APInt::getNullValue(
2686 N0.getValueType().getScalarType().getSizeInBits()),
2687 N0.getValueType());
2688 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2689 // do not return N1, because undef node may exist in N1
2690 return DAG.getConstant(
2691 APInt::getNullValue(
2692 N1.getValueType().getScalarType().getSizeInBits()),
2693 N1.getValueType());
2695 // fold (and x, -1) -> x, vector edition
2696 if (ISD::isBuildVectorAllOnes(N0.getNode()))
2697 return N1;
2698 if (ISD::isBuildVectorAllOnes(N1.getNode()))
2699 return N0;
2700 }
2702 // fold (and x, undef) -> 0
2703 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2704 return DAG.getConstant(0, VT);
2705 // fold (and c1, c2) -> c1&c2
2706 if (N0C && N1C)
2707 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
2708 // canonicalize constant to RHS
2709 if (N0C && !N1C)
2710 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
2711 // fold (and x, -1) -> x
2712 if (N1C && N1C->isAllOnesValue())
2713 return N0;
2714 // if (and x, c) is known to be zero, return 0
2715 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
2716 APInt::getAllOnesValue(BitWidth)))
2717 return DAG.getConstant(0, VT);
2718 // reassociate and
2719 SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
2720 if (RAND.getNode())
2721 return RAND;
2722 // fold (and (or x, C), D) -> D if (C & D) == D
2723 if (N1C && N0.getOpcode() == ISD::OR)
2724 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
2725 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
2726 return N1;
2727 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
2728 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
2729 SDValue N0Op0 = N0.getOperand(0);
2730 APInt Mask = ~N1C->getAPIntValue();
2731 Mask = Mask.trunc(N0Op0.getValueSizeInBits());
2732 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
2733 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
2734 N0.getValueType(), N0Op0);
2736 // Replace uses of the AND with uses of the Zero extend node.
2737 CombineTo(N, Zext);
2739 // We actually want to replace all uses of the any_extend with the
2740 // zero_extend, to avoid duplicating things. This will later cause this
2741 // AND to be folded.
2742 CombineTo(N0.getNode(), Zext);
2743 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2744 }
2745 }
2746 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
2747 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
2748 // already be zero by virtue of the width of the base type of the load.
2749 //
2750 // the 'X' node here can either be nothing or an extract_vector_elt to catch
2751 // more cases.
2752 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
2753 N0.getOperand(0).getOpcode() == ISD::LOAD) ||
2754 N0.getOpcode() == ISD::LOAD) {
2755 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
2756 N0 : N0.getOperand(0) );
2758 // Get the constant (if applicable) the zero'th operand is being ANDed with.
2759 // This can be a pure constant or a vector splat, in which case we treat the
2760 // vector as a scalar and use the splat value.
2761 APInt Constant = APInt::getNullValue(1);
2762 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
2763 Constant = C->getAPIntValue();
2764 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
2765 APInt SplatValue, SplatUndef;
2766 unsigned SplatBitSize;
2767 bool HasAnyUndefs;
2768 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
2769 SplatBitSize, HasAnyUndefs);
2770 if (IsSplat) {
2771 // Undef bits can contribute to a possible optimisation if set, so
2772 // set them.
2773 SplatValue |= SplatUndef;
2775 // The splat value may be something like "0x00FFFFFF", which means 0 for
2776 // the first vector value and FF for the rest, repeating. We need a mask
2777 // that will apply equally to all members of the vector, so AND all the
2778 // lanes of the constant together.
2779 EVT VT = Vector->getValueType(0);
2780 unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
2782 // If the splat value has been compressed to a bitlength lower
2783 // than the size of the vector lane, we need to re-expand it to
2784 // the lane size.
2785 if (BitWidth > SplatBitSize)
2786 for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
2787 SplatBitSize < BitWidth;
2788 SplatBitSize = SplatBitSize * 2)
2789 SplatValue |= SplatValue.shl(SplatBitSize);
2791 Constant = APInt::getAllOnesValue(BitWidth);
2792 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
2793 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
2794 }
2795 }
2797 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
2798 // actually legal and isn't going to get expanded, else this is a false
2799 // optimisation.
2800 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
2801 Load->getValueType(0),
2802 Load->getMemoryVT());
2804 // Resize the constant to the same size as the original memory access before
2805 // extension. If it is still the AllOnesValue then this AND is completely
2806 // unneeded.
2807 Constant =
2808 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
2810 bool B;
2811 switch (Load->getExtensionType()) {
2812 default: B = false; break;
2813 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
2814 case ISD::ZEXTLOAD:
2815 case ISD::NON_EXTLOAD: B = true; break;
2816 }
2818 if (B && Constant.isAllOnesValue()) {
2819 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
2820 // preserve semantics once we get rid of the AND.
2821 SDValue NewLoad(Load, 0);
2822 if (Load->getExtensionType() == ISD::EXTLOAD) {
2823 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
2824 Load->getValueType(0), SDLoc(Load),
2825 Load->getChain(), Load->getBasePtr(),
2826 Load->getOffset(), Load->getMemoryVT(),
2827 Load->getMemOperand());
2828 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
2829 if (Load->getNumValues() == 3) {
2830 // PRE/POST_INC loads have 3 values.
2831 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
2832 NewLoad.getValue(2) };
2833 CombineTo(Load, To, 3, true);
2834 } else {
2835 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
2836 }
2837 }
2839 // Fold the AND away, taking care not to fold to the old load node if we
2840 // replaced it.
2841 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
2843 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2844 }
2845 }
2846 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
2847 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
2848 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
2849 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
2851 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
2852 LL.getValueType().isInteger()) {
2853 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
2854 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
2855 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2856 LR.getValueType(), LL, RL);
2857 AddToWorklist(ORNode.getNode());
2858 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
2859 }
2860 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
2861 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
2862 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
2863 LR.getValueType(), LL, RL);
2864 AddToWorklist(ANDNode.getNode());
2865 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
2866 }
2867 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
2868 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
2869 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2870 LR.getValueType(), LL, RL);
2871 AddToWorklist(ORNode.getNode());
2872 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
2873 }
2874 }
2875 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
2876 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
2877 Op0 == Op1 && LL.getValueType().isInteger() &&
2878 Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
2879 cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
2880 (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
2881 cast<ConstantSDNode>(RR)->isNullValue()))) {
2882 SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
2883 LL, DAG.getConstant(1, LL.getValueType()));
2884 AddToWorklist(ADDNode.getNode());
2885 return DAG.getSetCC(SDLoc(N), VT, ADDNode,
2886 DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
2887 }
2888 // canonicalize equivalent to ll == rl
2889 if (LL == RR && LR == RL) {
2890 Op1 = ISD::getSetCCSwappedOperands(Op1);
2891 std::swap(RL, RR);
2892 }
2893 if (LL == RL && LR == RR) {
2894 bool isInteger = LL.getValueType().isInteger();
2895 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
2896 if (Result != ISD::SETCC_INVALID &&
2897 (!LegalOperations ||
2898 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
2899 TLI.isOperationLegal(ISD::SETCC,
2900 getSetCCResultType(N0.getSimpleValueType())))))
2901 return DAG.getSetCC(SDLoc(N), N0.getValueType(),
2902 LL, LR, Result);
2903 }
2904 }
2906 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
2907 if (N0.getOpcode() == N1.getOpcode()) {
2908 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
2909 if (Tmp.getNode()) return Tmp;
2910 }
2912 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
2913 // fold (and (sra)) -> (and (srl)) when possible.
2914 if (!VT.isVector() &&
2915 SimplifyDemandedBits(SDValue(N, 0)))
2916 return SDValue(N, 0);
2918 // fold (zext_inreg (extload x)) -> (zextload x)
2919 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
2920 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
2921 EVT MemVT = LN0->getMemoryVT();
2922 // If we zero all the possible extended bits, then we can turn this into
2923 // a zextload if we are running before legalize or the operation is legal.
2924 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
2925 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
2926 BitWidth - MemVT.getScalarType().getSizeInBits())) &&
2927 ((!LegalOperations && !LN0->isVolatile()) ||
2928 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
2929 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
2930 LN0->getChain(), LN0->getBasePtr(),
2931 MemVT, LN0->getMemOperand());
2932 AddToWorklist(N);
2933 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
2934 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2935 }
2936 }
2937 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
2938 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
2939 N0.hasOneUse()) {
2940 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
2941 EVT MemVT = LN0->getMemoryVT();
2942 // If we zero all the possible extended bits, then we can turn this into
2943 // a zextload if we are running before legalize or the operation is legal.
2944 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
2945 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
2946 BitWidth - MemVT.getScalarType().getSizeInBits())) &&
2947 ((!LegalOperations && !LN0->isVolatile()) ||
2948 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
2949 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
2950 LN0->getChain(), LN0->getBasePtr(),
2951 MemVT, LN0->getMemOperand());
2952 AddToWorklist(N);
2953 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
2954 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2955 }
2956 }
2958 // fold (and (load x), 255) -> (zextload x, i8)
2959 // fold (and (extload x, i16), 255) -> (zextload x, i8)
2960 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
2961 if (N1C && (N0.getOpcode() == ISD::LOAD ||
2962 (N0.getOpcode() == ISD::ANY_EXTEND &&
2963 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
2964 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
2965 LoadSDNode *LN0 = HasAnyExt
2966 ? cast<LoadSDNode>(N0.getOperand(0))
2967 : cast<LoadSDNode>(N0);
2968 if (LN0->getExtensionType() != ISD::SEXTLOAD &&
2969 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
2970 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
2971 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
2972 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
2973 EVT LoadedVT = LN0->getMemoryVT();
2974 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
2976 if (ExtVT == LoadedVT &&
2977 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
2978 ExtVT))) {
2980 SDValue NewLoad =
2981 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
2982 LN0->getChain(), LN0->getBasePtr(), ExtVT,
2983 LN0->getMemOperand());
2984 AddToWorklist(N);
2985 CombineTo(LN0, NewLoad, NewLoad.getValue(1));
2986 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2987 }
2989 // Do not change the width of a volatile load.
2990 // Do not generate loads of non-round integer types since these can
2991 // be expensive (and would be wrong if the type is not byte sized).
2992 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
2993 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
2994 ExtVT))) {
2995 EVT PtrType = LN0->getOperand(1).getValueType();
2997 unsigned Alignment = LN0->getAlignment();
2998 SDValue NewPtr = LN0->getBasePtr();
3000 // For big endian targets, we need to add an offset to the pointer
3001 // to load the correct bytes. For little endian systems, we merely
3002 // need to read fewer bytes from the same pointer.
3003 if (TLI.isBigEndian()) {
3004 unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3005 unsigned EVTStoreBytes = ExtVT.getStoreSize();
3006 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3007 NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType,
3008 NewPtr, DAG.getConstant(PtrOff, PtrType));
3009 Alignment = MinAlign(Alignment, PtrOff);
3010 }
3012 AddToWorklist(NewPtr.getNode());
3014 SDValue Load =
3015 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3016 LN0->getChain(), NewPtr,
3017 LN0->getPointerInfo(),
3018 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
3019 LN0->isInvariant(), Alignment, LN0->getAAInfo());
3020 AddToWorklist(N);
3021 CombineTo(LN0, Load, Load.getValue(1));
3022 return SDValue(N, 0); // Return N so it doesn't get rechecked!
3023 }
3024 }
3025 }
3026 }
3028 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3029 VT.getSizeInBits() <= 64) {
3030 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3031 APInt ADDC = ADDI->getAPIntValue();
3032 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3033 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3034 // immediate for an add, but it is legal if its top c2 bits are set,
3035 // transform the ADD so the immediate doesn't need to be materialized
3036 // in a register.
3037 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3038 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3039 SRLI->getZExtValue());
3040 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3041 ADDC |= Mask;
3042 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3043 SDValue NewAdd =
3044 DAG.getNode(ISD::ADD, SDLoc(N0), VT,
3045 N0.getOperand(0), DAG.getConstant(ADDC, VT));
3046 CombineTo(N0.getNode(), NewAdd);
3047 return SDValue(N, 0); // Return N so it doesn't get rechecked!
3048 }
3049 }
3050 }
3051 }
3052 }
3053 }
3055 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3056 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3057 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3058 N0.getOperand(1), false);
3059 if (BSwap.getNode())
3060 return BSwap;
3061 }
3063 return SDValue();
3064 }
3066 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3067 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3068 bool DemandHighBits) {
3069 if (!LegalOperations)
3070 return SDValue();
3072 EVT VT = N->getValueType(0);
3073 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3074 return SDValue();
3075 if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3076 return SDValue();
3078 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3079 bool LookPassAnd0 = false;
3080 bool LookPassAnd1 = false;
3081 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3082 std::swap(N0, N1);
3083 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3084 std::swap(N0, N1);
3085 if (N0.getOpcode() == ISD::AND) {
3086 if (!N0.getNode()->hasOneUse())
3087 return SDValue();
3088 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3089 if (!N01C || N01C->getZExtValue() != 0xFF00)
3090 return SDValue();
3091 N0 = N0.getOperand(0);
3092 LookPassAnd0 = true;
3093 }
3095 if (N1.getOpcode() == ISD::AND) {
3096 if (!N1.getNode()->hasOneUse())
3097 return SDValue();
3098 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3099 if (!N11C || N11C->getZExtValue() != 0xFF)
3100 return SDValue();
3101 N1 = N1.getOperand(0);
3102 LookPassAnd1 = true;
3103 }
3105 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3106 std::swap(N0, N1);
3107 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3108 return SDValue();
3109 if (!N0.getNode()->hasOneUse() ||
3110 !N1.getNode()->hasOneUse())
3111 return SDValue();
3113 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3114 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
3115 if (!N01C || !N11C)
3116 return SDValue();
3117 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
3118 return SDValue();
3120 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
3121 SDValue N00 = N0->getOperand(0);
3122 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
3123 if (!N00.getNode()->hasOneUse())
3124 return SDValue();
3125 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
3126 if (!N001C || N001C->getZExtValue() != 0xFF)
3127 return SDValue();
3128 N00 = N00.getOperand(0);
3129 LookPassAnd0 = true;
3130 }
3132 SDValue N10 = N1->getOperand(0);
3133 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
3134 if (!N10.getNode()->hasOneUse())
3135 return SDValue();
3136 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
3137 if (!N101C || N101C->getZExtValue() != 0xFF00)
3138 return SDValue();
3139 N10 = N10.getOperand(0);
3140 LookPassAnd1 = true;
3141 }
3143 if (N00 != N10)
3144 return SDValue();
3146 // Make sure everything beyond the low halfword gets set to zero since the SRL
3147 // 16 will clear the top bits.
3148 unsigned OpSizeInBits = VT.getSizeInBits();
3149 if (DemandHighBits && OpSizeInBits > 16) {
3150 // If the left-shift isn't masked out then the only way this is a bswap is
3151 // if all bits beyond the low 8 are 0. In that case the entire pattern
3152 // reduces to a left shift anyway: leave it for other parts of the combiner.
3153 if (!LookPassAnd0)
3154 return SDValue();
3156 // However, if the right shift isn't masked out then it might be because
3157 // it's not needed. See if we can spot that too.
3158 if (!LookPassAnd1 &&
3159 !DAG.MaskedValueIsZero(
3160 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
3161 return SDValue();
3162 }
3164 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
3165 if (OpSizeInBits > 16)
3166 Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res,
3167 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
3168 return Res;
3169 }
3171 /// Return true if the specified node is an element that makes up a 32-bit
3172 /// packed halfword byteswap.
3173 /// ((x & 0x000000ff) << 8) |
3174 /// ((x & 0x0000ff00) >> 8) |
3175 /// ((x & 0x00ff0000) << 8) |
3176 /// ((x & 0xff000000) >> 8)
3177 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
3178 if (!N.getNode()->hasOneUse())
3179 return false;
3181 unsigned Opc = N.getOpcode();
3182 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
3183 return false;
3185 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3186 if (!N1C)
3187 return false;
3189 unsigned Num;
3190 switch (N1C->getZExtValue()) {
3191 default:
3192 return false;
3193 case 0xFF: Num = 0; break;
3194 case 0xFF00: Num = 1; break;
3195 case 0xFF0000: Num = 2; break;
3196 case 0xFF000000: Num = 3; break;
3197 }
3199 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
3200 SDValue N0 = N.getOperand(0);
3201 if (Opc == ISD::AND) {
3202 if (Num == 0 || Num == 2) {
3203 // (x >> 8) & 0xff
3204 // (x >> 8) & 0xff0000
3205 if (N0.getOpcode() != ISD::SRL)
3206 return false;
3207 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3208 if (!C || C->getZExtValue() != 8)
3209 return false;
3210 } else {
3211 // (x << 8) & 0xff00
3212 // (x << 8) & 0xff000000
3213 if (N0.getOpcode() != ISD::SHL)
3214 return false;
3215 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3216 if (!C || C->getZExtValue() != 8)
3217 return false;
3218 }
3219 } else if (Opc == ISD::SHL) {
3220 // (x & 0xff) << 8
3221 // (x & 0xff0000) << 8
3222 if (Num != 0 && Num != 2)
3223 return false;
3224 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3225 if (!C || C->getZExtValue() != 8)
3226 return false;
3227 } else { // Opc == ISD::SRL
3228 // (x & 0xff00) >> 8
3229 // (x & 0xff000000) >> 8
3230 if (Num != 1 && Num != 3)
3231 return false;
3232 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3233 if (!C || C->getZExtValue() != 8)
3234 return false;
3235 }
3237 if (Parts[Num])
3238 return false;
3240 Parts[Num] = N0.getOperand(0).getNode();
3241 return true;
3242 }
3244 /// Match a 32-bit packed halfword bswap. That is
3245 /// ((x & 0x000000ff) << 8) |
3246 /// ((x & 0x0000ff00) >> 8) |
3247 /// ((x & 0x00ff0000) << 8) |
3248 /// ((x & 0xff000000) >> 8)
3249 /// => (rotl (bswap x), 16)
3250 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
3251 if (!LegalOperations)
3252 return SDValue();
3254 EVT VT = N->getValueType(0);
3255 if (VT != MVT::i32)
3256 return SDValue();
3257 if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3258 return SDValue();
3260 // Look for either
3261 // (or (or (and), (and)), (or (and), (and)))
3262 // (or (or (or (and), (and)), (and)), (and))
3263 if (N0.getOpcode() != ISD::OR)
3264 return SDValue();
3265 SDValue N00 = N0.getOperand(0);
3266 SDValue N01 = N0.getOperand(1);
3267 SDNode *Parts[4] = {};
3269 if (N1.getOpcode() == ISD::OR &&
3270 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
3271 // (or (or (and), (and)), (or (and), (and)))
3272 SDValue N000 = N00.getOperand(0);
3273 if (!isBSwapHWordElement(N000, Parts))
3274 return SDValue();
3276 SDValue N001 = N00.getOperand(1);
3277 if (!isBSwapHWordElement(N001, Parts))
3278 return SDValue();
3279 SDValue N010 = N01.getOperand(0);
3280 if (!isBSwapHWordElement(N010, Parts))
3281 return SDValue();
3282 SDValue N011 = N01.getOperand(1);
3283 if (!isBSwapHWordElement(N011, Parts))
3284 return SDValue();
3285 } else {
3286 // (or (or (or (and), (and)), (and)), (and))
3287 if (!isBSwapHWordElement(N1, Parts))
3288 return SDValue();
3289 if (!isBSwapHWordElement(N01, Parts))
3290 return SDValue();
3291 if (N00.getOpcode() != ISD::OR)
3292 return SDValue();
3293 SDValue N000 = N00.getOperand(0);
3294 if (!isBSwapHWordElement(N000, Parts))
3295 return SDValue();
3296 SDValue N001 = N00.getOperand(1);
3297 if (!isBSwapHWordElement(N001, Parts))
3298 return SDValue();
3299 }
3301 // Make sure the parts are all coming from the same node.
3302 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
3303 return SDValue();
3305 SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
3306 SDValue(Parts[0],0));
3308 // Result of the bswap should be rotated by 16. If it's not legal, then
3309 // do (x << 16) | (x >> 16).
3310 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
3311 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
3312 return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt);
3313 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
3314 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt);
3315 return DAG.getNode(ISD::OR, SDLoc(N), VT,
3316 DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt),
3317 DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
3318 }
3320 SDValue DAGCombiner::visitOR(SDNode *N) {
3321 SDValue N0 = N->getOperand(0);
3322 SDValue N1 = N->getOperand(1);
3323 SDValue LL, LR, RL, RR, CC0, CC1;
3324 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3325 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3326 EVT VT = N1.getValueType();
3328 // fold vector ops
3329 if (VT.isVector()) {
3330 SDValue FoldedVOp = SimplifyVBinOp(N);
3331 if (FoldedVOp.getNode()) return FoldedVOp;
3333 // fold (or x, 0) -> x, vector edition
3334 if (ISD::isBuildVectorAllZeros(N0.getNode()))
3335 return N1;
3336 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3337 return N0;
3339 // fold (or x, -1) -> -1, vector edition
3340 if (ISD::isBuildVectorAllOnes(N0.getNode()))
3341 // do not return N0, because undef node may exist in N0
3342 return DAG.getConstant(
3343 APInt::getAllOnesValue(
3344 N0.getValueType().getScalarType().getSizeInBits()),
3345 N0.getValueType());
3346 if (ISD::isBuildVectorAllOnes(N1.getNode()))
3347 // do not return N1, because undef node may exist in N1
3348 return DAG.getConstant(
3349 APInt::getAllOnesValue(
3350 N1.getValueType().getScalarType().getSizeInBits()),
3351 N1.getValueType());
3353 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
3354 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
3355 // Do this only if the resulting shuffle is legal.
3356 if (isa<ShuffleVectorSDNode>(N0) &&
3357 isa<ShuffleVectorSDNode>(N1) &&
3358 // Avoid folding a node with illegal type.
3359 TLI.isTypeLegal(VT) &&
3360 N0->getOperand(1) == N1->getOperand(1) &&
3361 ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
3362 bool CanFold = true;
3363 unsigned NumElts = VT.getVectorNumElements();
3364 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
3365 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
3366 // We construct two shuffle masks:
3367 // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
3368 // and N1 as the second operand.
3369 // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
3370 // and N0 as the second operand.
3371 // We do this because OR is commutable and therefore there might be
3372 // two ways to fold this node into a shuffle.
3373 SmallVector<int,4> Mask1;
3374 SmallVector<int,4> Mask2;
3376 for (unsigned i = 0; i != NumElts && CanFold; ++i) {
3377 int M0 = SV0->getMaskElt(i);
3378 int M1 = SV1->getMaskElt(i);
3380 // Both shuffle indexes are undef. Propagate Undef.
3381 if (M0 < 0 && M1 < 0) {
3382 Mask1.push_back(M0);
3383 Mask2.push_back(M0);
3384 continue;
3385 }
3387 if (M0 < 0 || M1 < 0 ||
3388 (M0 < (int)NumElts && M1 < (int)NumElts) ||
3389 (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
3390 CanFold = false;
3391 break;
3392 }
3394 Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
3395 Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
3396 }
3398 if (CanFold) {
3399 // Fold this sequence only if the resulting shuffle is 'legal'.
3400 if (TLI.isShuffleMaskLegal(Mask1, VT))
3401 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
3402 N1->getOperand(0), &Mask1[0]);
3403 if (TLI.isShuffleMaskLegal(Mask2, VT))
3404 return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
3405 N0->getOperand(0), &Mask2[0]);
3406 }
3407 }
3408 }
3410 // fold (or x, undef) -> -1
3411 if (!LegalOperations &&
3412 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
3413 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
3414 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
3415 }
3416 // fold (or c1, c2) -> c1|c2
3417 if (N0C && N1C)
3418 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
3419 // canonicalize constant to RHS
3420 if (N0C && !N1C)
3421 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
3422 // fold (or x, 0) -> x
3423 if (N1C && N1C->isNullValue())
3424 return N0;
3425 // fold (or x, -1) -> -1
3426 if (N1C && N1C->isAllOnesValue())
3427 return N1;
3428 // fold (or x, c) -> c iff (x & ~c) == 0
3429 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
3430 return N1;
3432 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
3433 SDValue BSwap = MatchBSwapHWord(N, N0, N1);
3434 if (BSwap.getNode())
3435 return BSwap;
3436 BSwap = MatchBSwapHWordLow(N, N0, N1);
3437 if (BSwap.getNode())
3438 return BSwap;
3440 // reassociate or
3441 SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
3442 if (ROR.getNode())
3443 return ROR;
3444 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
3445 // iff (c1 & c2) == 0.
3446 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
3447 isa<ConstantSDNode>(N0.getOperand(1))) {
3448 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
3449 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
3450 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1))
3451 return DAG.getNode(
3452 ISD::AND, SDLoc(N), VT,
3453 DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
3454 return SDValue();
3455 }
3456 }
3457 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
3458 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
3459 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
3460 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
3462 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
3463 LL.getValueType().isInteger()) {
3464 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
3465 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
3466 if (cast<ConstantSDNode>(LR)->isNullValue() &&
3467 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
3468 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
3469 LR.getValueType(), LL, RL);
3470 AddToWorklist(ORNode.getNode());
3471 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
3472 }
3473 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
3474 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
3475 if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
3476 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
3477 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
3478 LR.getValueType(), LL, RL);
3479 AddToWorklist(ANDNode.getNode());
3480 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
3481 }
3482 }
3483 // canonicalize equivalent to ll == rl
3484 if (LL == RR && LR == RL) {
3485 Op1 = ISD::getSetCCSwappedOperands(Op1);
3486 std::swap(RL, RR);
3487 }
3488 if (LL == RL && LR == RR) {
3489 bool isInteger = LL.getValueType().isInteger();
3490 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
3491 if (Result != ISD::SETCC_INVALID &&
3492 (!LegalOperations ||
3493 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3494 TLI.isOperationLegal(ISD::SETCC,
3495 getSetCCResultType(N0.getValueType())))))
3496 return DAG.getSetCC(SDLoc(N), N0.getValueType(),
3497 LL, LR, Result);
3498 }
3499 }
3501 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
3502 if (N0.getOpcode() == N1.getOpcode()) {
3503 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
3504 if (Tmp.getNode()) return Tmp;
3505 }
3507 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
3508 if (N0.getOpcode() == ISD::AND &&
3509 N1.getOpcode() == ISD::AND &&
3510 N0.getOperand(1).getOpcode() == ISD::Constant &&
3511 N1.getOperand(1).getOpcode() == ISD::Constant &&
3512 // Don't increase # computations.
3513 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3514 // We can only do this xform if we know that bits from X that are set in C2
3515 // but not in C1 are already zero. Likewise for Y.
3516 const APInt &LHSMask =
3517 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
3518 const APInt &RHSMask =
3519 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
3521 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
3522 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
3523 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3524 N0.getOperand(0), N1.getOperand(0));
3525 return DAG.getNode(ISD::AND, SDLoc(N), VT, X,
3526 DAG.getConstant(LHSMask | RHSMask, VT));
3527 }
3528 }
3530 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
3531 if (N0.getOpcode() == ISD::AND &&
3532 N1.getOpcode() == ISD::AND &&
3533 N0.getOperand(0) == N1.getOperand(0) &&
3534 // Don't increase # computations.
3535 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3536 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3537 N0.getOperand(1), N1.getOperand(1));
3538 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X);
3539 }
3541 // See if this is some rotate idiom.
3542 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
3543 return SDValue(Rot, 0);
3545 // Simplify the operands using demanded-bits information.
3546 if (!VT.isVector() &&
3547 SimplifyDemandedBits(SDValue(N, 0)))
3548 return SDValue(N, 0);
3550 return SDValue();
3551 }
3553 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
3554 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
3555 if (Op.getOpcode() == ISD::AND) {
3556 if (isa<ConstantSDNode>(Op.getOperand(1))) {
3557 Mask = Op.getOperand(1);
3558 Op = Op.getOperand(0);
3559 } else {
3560 return false;
3561 }
3562 }
3564 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
3565 Shift = Op;
3566 return true;
3567 }
3569 return false;
3570 }
3572 // Return true if we can prove that, whenever Neg and Pos are both in the
3573 // range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that
3574 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
3575 //
3576 // (or (shift1 X, Neg), (shift2 X, Pos))
3577 //
3578 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
3579 // in direction shift1 by Neg. The range [0, OpSize) means that we only need
3580 // to consider shift amounts with defined behavior.
3581 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
3582 // If OpSize is a power of 2 then:
3583 //
3584 // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
3585 // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
3586 //
3587 // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
3588 // for the stronger condition:
3589 //
3590 // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A]
3591 //
3592 // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
3593 // we can just replace Neg with Neg' for the rest of the function.
3594 //
3595 // In other cases we check for the even stronger condition:
3596 //
3597 // Neg == OpSize - Pos [B]
3598 //
3599 // for all Neg and Pos. Note that the (or ...) then invokes undefined
3600 // behavior if Pos == 0 (and consequently Neg == OpSize).
3601 //
3602 // We could actually use [A] whenever OpSize is a power of 2, but the
3603 // only extra cases that it would match are those uninteresting ones
3604 // where Neg and Pos are never in range at the same time. E.g. for
3605 // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
3606 // as well as (sub 32, Pos), but:
3607 //
3608 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
3609 //
3610 // always invokes undefined behavior for 32-bit X.
3611 //
3612 // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
3613 unsigned MaskLoBits = 0;
3614 if (Neg.getOpcode() == ISD::AND &&
3615 isPowerOf2_64(OpSize) &&
3616 Neg.getOperand(1).getOpcode() == ISD::Constant &&
3617 cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
3618 Neg = Neg.getOperand(0);
3619 MaskLoBits = Log2_64(OpSize);
3620 }
3622 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
3623 if (Neg.getOpcode() != ISD::SUB)
3624 return 0;
3625 ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0));
3626 if (!NegC)
3627 return 0;
3628 SDValue NegOp1 = Neg.getOperand(1);
3630 // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
3631 // Pos'. The truncation is redundant for the purpose of the equality.
3632 if (MaskLoBits &&
3633 Pos.getOpcode() == ISD::AND &&
3634 Pos.getOperand(1).getOpcode() == ISD::Constant &&
3635 cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
3636 Pos = Pos.getOperand(0);
3638 // The condition we need is now:
3639 //
3640 // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
3641 //
3642 // If NegOp1 == Pos then we need:
3643 //
3644 // OpSize & Mask == NegC & Mask
3645 //
3646 // (because "x & Mask" is a truncation and distributes through subtraction).
3647 APInt Width;
3648 if (Pos == NegOp1)
3649 Width = NegC->getAPIntValue();
3650 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
3651 // Then the condition we want to prove becomes:
3652 //
3653 // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
3654 //
3655 // which, again because "x & Mask" is a truncation, becomes:
3656 //
3657 // NegC & Mask == (OpSize - PosC) & Mask
3658 // OpSize & Mask == (NegC + PosC) & Mask
3659 else if (Pos.getOpcode() == ISD::ADD &&
3660 Pos.getOperand(0) == NegOp1 &&
3661 Pos.getOperand(1).getOpcode() == ISD::Constant)
3662 Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
3663 NegC->getAPIntValue());
3664 else
3665 return false;
3667 // Now we just need to check that OpSize & Mask == Width & Mask.
3668 if (MaskLoBits)
3669 // Opsize & Mask is 0 since Mask is Opsize - 1.
3670 return Width.getLoBits(MaskLoBits) == 0;
3671 return Width == OpSize;
3672 }
3674 // A subroutine of MatchRotate used once we have found an OR of two opposite
3675 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
3676 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
3677 // former being preferred if supported. InnerPos and InnerNeg are Pos and
3678 // Neg with outer conversions stripped away.
3679 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
3680 SDValue Neg, SDValue InnerPos,
3681 SDValue InnerNeg, unsigned PosOpcode,
3682 unsigned NegOpcode, SDLoc DL) {
3683 // fold (or (shl x, (*ext y)),
3684 // (srl x, (*ext (sub 32, y)))) ->
3685 // (rotl x, y) or (rotr x, (sub 32, y))
3686 //
3687 // fold (or (shl x, (*ext (sub 32, y))),
3688 // (srl x, (*ext y))) ->
3689 // (rotr x, y) or (rotl x, (sub 32, y))
3690 EVT VT = Shifted.getValueType();
3691 if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
3692 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
3693 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
3694 HasPos ? Pos : Neg).getNode();
3695 }
3697 return nullptr;
3698 }
3700 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
3701 // idioms for rotate, and if the target supports rotation instructions, generate
3702 // a rot[lr].
3703 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
3704 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
3705 EVT VT = LHS.getValueType();
3706 if (!TLI.isTypeLegal(VT)) return nullptr;
3708 // The target must have at least one rotate flavor.
3709 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
3710 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
3711 if (!HasROTL && !HasROTR) return nullptr;
3713 // Match "(X shl/srl V1) & V2" where V2 may not be present.
3714 SDValue LHSShift; // The shift.
3715 SDValue LHSMask; // AND value if any.
3716 if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
3717 return nullptr; // Not part of a rotate.
3719 SDValue RHSShift; // The shift.
3720 SDValue RHSMask; // AND value if any.
3721 if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
3722 return nullptr; // Not part of a rotate.
3724 if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
3725 return nullptr; // Not shifting the same value.
3727 if (LHSShift.getOpcode() == RHSShift.getOpcode())
3728 return nullptr; // Shifts must disagree.
3730 // Canonicalize shl to left side in a shl/srl pair.
3731 if (RHSShift.getOpcode() == ISD::SHL) {
3732 std::swap(LHS, RHS);
3733 std::swap(LHSShift, RHSShift);
3734 std::swap(LHSMask , RHSMask );
3735 }
3737 unsigned OpSizeInBits = VT.getSizeInBits();
3738 SDValue LHSShiftArg = LHSShift.getOperand(0);
3739 SDValue LHSShiftAmt = LHSShift.getOperand(1);
3740 SDValue RHSShiftArg = RHSShift.getOperand(0);
3741 SDValue RHSShiftAmt = RHSShift.getOperand(1);
3743 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
3744 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
3745 if (LHSShiftAmt.getOpcode() == ISD::Constant &&
3746 RHSShiftAmt.getOpcode() == ISD::Constant) {
3747 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
3748 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
3749 if ((LShVal + RShVal) != OpSizeInBits)
3750 return nullptr;
3752 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
3753 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
3755 // If there is an AND of either shifted operand, apply it to the result.
3756 if (LHSMask.getNode() || RHSMask.getNode()) {
3757 APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
3759 if (LHSMask.getNode()) {
3760 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
3761 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
3762 }
3763 if (RHSMask.getNode()) {
3764 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
3765 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
3766 }
3768 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
3769 }
3771 return Rot.getNode();
3772 }
3774 // If there is a mask here, and we have a variable shift, we can't be sure
3775 // that we're masking out the right stuff.
3776 if (LHSMask.getNode() || RHSMask.getNode())
3777 return nullptr;
3779 // If the shift amount is sign/zext/any-extended just peel it off.
3780 SDValue LExtOp0 = LHSShiftAmt;
3781 SDValue RExtOp0 = RHSShiftAmt;
3782 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
3783 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
3784 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
3785 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
3786 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
3787 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
3788 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
3789 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
3790 LExtOp0 = LHSShiftAmt.getOperand(0);
3791 RExtOp0 = RHSShiftAmt.getOperand(0);
3792 }
3794 SDNode *TryL = MatchRotateP