21d43ec3be3ca8431851ddcff562b1556d7f2e3b
1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
15 #include "X86ISelLowering.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "X86CallingConv.h"
18 #include "X86InstrBuilder.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86TargetMachine.h"
21 #include "X86TargetObjectFile.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/VariadicFunction.h"
28 #include "llvm/CodeGen/IntrinsicLowering.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineJumpTableInfo.h"
33 #include "llvm/CodeGen/MachineModuleInfo.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/IR/CallSite.h"
36 #include "llvm/IR/CallingConv.h"
37 #include "llvm/IR/Constants.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalAlias.h"
41 #include "llvm/IR/GlobalVariable.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/MC/MCAsmInfo.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCExpr.h"
47 #include "llvm/MC/MCSymbol.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/MathExtras.h"
52 #include "llvm/Target/TargetOptions.h"
53 #include "X86IntrinsicsInfo.h"
54 #include <bitset>
55 #include <numeric>
56 #include <cctype>
57 using namespace llvm;
59 #define DEBUG_TYPE "x86-isel"
61 STATISTIC(NumTailCalls, "Number of tail calls");
63 static cl::opt<bool> ExperimentalVectorWideningLegalization(
64 "x86-experimental-vector-widening-legalization", cl::init(false),
65 cl::desc("Enable an experimental vector type legalization through widening "
66 "rather than promotion."),
67 cl::Hidden);
69 static cl::opt<bool> ExperimentalVectorShuffleLowering(
70 "x86-experimental-vector-shuffle-lowering", cl::init(true),
71 cl::desc("Enable an experimental vector shuffle lowering code path."),
72 cl::Hidden);
74 static cl::opt<bool> ExperimentalVectorShuffleLegality(
75 "x86-experimental-vector-shuffle-legality", cl::init(false),
76 cl::desc("Enable experimental shuffle legality based on the experimental "
77 "shuffle lowering. Should only be used with the experimental "
78 "shuffle lowering."),
79 cl::Hidden);
81 static cl::opt<int> ReciprocalEstimateRefinementSteps(
82 "x86-recip-refinement-steps", cl::init(1),
83 cl::desc("Specify the number of Newton-Raphson iterations applied to the "
84 "result of the hardware reciprocal estimate instruction."),
85 cl::NotHidden);
87 // Forward declarations.
88 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
89 SDValue V2);
91 static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
92 SelectionDAG &DAG, SDLoc dl,
93 unsigned vectorWidth) {
94 assert((vectorWidth == 128 || vectorWidth == 256) &&
95 "Unsupported vector width");
96 EVT VT = Vec.getValueType();
97 EVT ElVT = VT.getVectorElementType();
98 unsigned Factor = VT.getSizeInBits()/vectorWidth;
99 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
100 VT.getVectorNumElements()/Factor);
102 // Extract from UNDEF is UNDEF.
103 if (Vec.getOpcode() == ISD::UNDEF)
104 return DAG.getUNDEF(ResultVT);
106 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
107 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
109 // This is the index of the first element of the vectorWidth-bit chunk
110 // we want.
111 unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
112 * ElemsPerChunk);
114 // If the input is a buildvector just emit a smaller one.
115 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
116 return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
117 makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
118 ElemsPerChunk));
120 SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
121 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
122 }
124 /// Generate a DAG to grab 128-bits from a vector > 128 bits. This
125 /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
126 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
127 /// instructions or a simple subregister reference. Idx is an index in the
128 /// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
129 /// lowering EXTRACT_VECTOR_ELT operations easier.
130 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
131 SelectionDAG &DAG, SDLoc dl) {
132 assert((Vec.getValueType().is256BitVector() ||
133 Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
134 return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
135 }
137 /// Generate a DAG to grab 256-bits from a 512-bit vector.
138 static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
139 SelectionDAG &DAG, SDLoc dl) {
140 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
141 return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
142 }
144 static SDValue InsertSubVector(SDValue Result, SDValue Vec,
145 unsigned IdxVal, SelectionDAG &DAG,
146 SDLoc dl, unsigned vectorWidth) {
147 assert((vectorWidth == 128 || vectorWidth == 256) &&
148 "Unsupported vector width");
149 // Inserting UNDEF is Result
150 if (Vec.getOpcode() == ISD::UNDEF)
151 return Result;
152 EVT VT = Vec.getValueType();
153 EVT ElVT = VT.getVectorElementType();
154 EVT ResultVT = Result.getValueType();
156 // Insert the relevant vectorWidth bits.
157 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
159 // This is the index of the first element of the vectorWidth-bit chunk
160 // we want.
161 unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
162 * ElemsPerChunk);
164 SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
165 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
166 }
168 /// Generate a DAG to put 128-bits into a vector > 128 bits. This
169 /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
170 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
171 /// simple superregister reference. Idx is an index in the 128 bits
172 /// we want. It need not be aligned to a 128-bit boundary. That makes
173 /// lowering INSERT_VECTOR_ELT operations easier.
174 static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
175 SelectionDAG &DAG,SDLoc dl) {
176 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
177 return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
178 }
180 static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
181 SelectionDAG &DAG, SDLoc dl) {
182 assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
183 return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
184 }
186 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
187 /// instructions. This is used because creating CONCAT_VECTOR nodes of
188 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
189 /// large BUILD_VECTORS.
190 static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
191 unsigned NumElems, SelectionDAG &DAG,
192 SDLoc dl) {
193 SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
194 return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
195 }
197 static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
198 unsigned NumElems, SelectionDAG &DAG,
199 SDLoc dl) {
200 SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
201 return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
202 }
204 // FIXME: This should stop caching the target machine as soon as
205 // we can remove resetOperationActions et al.
206 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM)
207 : TargetLowering(TM) {
208 Subtarget = &TM.getSubtarget<X86Subtarget>();
209 X86ScalarSSEf64 = Subtarget->hasSSE2();
210 X86ScalarSSEf32 = Subtarget->hasSSE1();
211 TD = getDataLayout();
213 resetOperationActions();
214 }
216 void X86TargetLowering::resetOperationActions() {
217 const TargetMachine &TM = getTargetMachine();
218 static bool FirstTimeThrough = true;
220 // If none of the target options have changed, then we don't need to reset the
221 // operation actions.
222 if (!FirstTimeThrough && TO == TM.Options) return;
224 if (!FirstTimeThrough) {
225 // Reinitialize the actions.
226 initActions();
227 FirstTimeThrough = false;
228 }
230 TO = TM.Options;
232 // Set up the TargetLowering object.
233 static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
235 // X86 is weird. It always uses i8 for shift amounts and setcc results.
236 setBooleanContents(ZeroOrOneBooleanContent);
237 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
238 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
240 // For 64-bit, since we have so many registers, use the ILP scheduler.
241 // For 32-bit, use the register pressure specific scheduling.
242 // For Atom, always use ILP scheduling.
243 if (Subtarget->isAtom())
244 setSchedulingPreference(Sched::ILP);
245 else if (Subtarget->is64Bit())
246 setSchedulingPreference(Sched::ILP);
247 else
248 setSchedulingPreference(Sched::RegPressure);
249 const X86RegisterInfo *RegInfo =
250 TM.getSubtarget<X86Subtarget>().getRegisterInfo();
251 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
253 // Bypass expensive divides on Atom when compiling with O2.
254 if (TM.getOptLevel() >= CodeGenOpt::Default) {
255 if (Subtarget->hasSlowDivide32())
256 addBypassSlowDiv(32, 8);
257 if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
258 addBypassSlowDiv(64, 16);
259 }
261 if (Subtarget->isTargetKnownWindowsMSVC()) {
262 // Setup Windows compiler runtime calls.
263 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
264 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
265 setLibcallName(RTLIB::SREM_I64, "_allrem");
266 setLibcallName(RTLIB::UREM_I64, "_aullrem");
267 setLibcallName(RTLIB::MUL_I64, "_allmul");
268 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
269 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
270 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
271 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
272 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
274 // The _ftol2 runtime function has an unusual calling conv, which
275 // is modeled by a special pseudo-instruction.
276 setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
277 setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
278 setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
279 setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
280 }
282 if (Subtarget->isTargetDarwin()) {
283 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
284 setUseUnderscoreSetJmp(false);
285 setUseUnderscoreLongJmp(false);
286 } else if (Subtarget->isTargetWindowsGNU()) {
287 // MS runtime is weird: it exports _setjmp, but longjmp!
288 setUseUnderscoreSetJmp(true);
289 setUseUnderscoreLongJmp(false);
290 } else {
291 setUseUnderscoreSetJmp(true);
292 setUseUnderscoreLongJmp(true);
293 }
295 // Set up the register classes.
296 addRegisterClass(MVT::i8, &X86::GR8RegClass);
297 addRegisterClass(MVT::i16, &X86::GR16RegClass);
298 addRegisterClass(MVT::i32, &X86::GR32RegClass);
299 if (Subtarget->is64Bit())
300 addRegisterClass(MVT::i64, &X86::GR64RegClass);
302 for (MVT VT : MVT::integer_valuetypes())
303 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
305 // We don't accept any truncstore of integer registers.
306 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
307 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
308 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
309 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
310 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
311 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
313 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
315 // SETOEQ and SETUNE require checking two conditions.
316 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
317 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
318 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
319 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
320 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
321 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
323 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
324 // operation.
325 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
326 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
327 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
329 if (Subtarget->is64Bit()) {
330 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
331 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
332 } else if (!TM.Options.UseSoftFloat) {
333 // We have an algorithm for SSE2->double, and we turn this into a
334 // 64-bit FILD followed by conditional FADD for other targets.
335 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
336 // We have an algorithm for SSE2, and we turn this into a 64-bit
337 // FILD for other targets.
338 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
339 }
341 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
342 // this operation.
343 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
344 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
346 if (!TM.Options.UseSoftFloat) {
347 // SSE has no i16 to fp conversion, only i32
348 if (X86ScalarSSEf32) {
349 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
350 // f32 and f64 cases are Legal, f80 case is not
351 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
352 } else {
353 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
354 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
355 }
356 } else {
357 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
358 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
359 }
361 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
362 // are Legal, f80 is custom lowered.
363 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
364 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
366 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
367 // this operation.
368 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
369 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
371 if (X86ScalarSSEf32) {
372 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
373 // f32 and f64 cases are Legal, f80 case is not
374 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
375 } else {
376 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
377 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
378 }
380 // Handle FP_TO_UINT by promoting the destination to a larger signed
381 // conversion.
382 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
383 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
384 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
386 if (Subtarget->is64Bit()) {
387 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
388 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
389 } else if (!TM.Options.UseSoftFloat) {
390 // Since AVX is a superset of SSE3, only check for SSE here.
391 if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
392 // Expand FP_TO_UINT into a select.
393 // FIXME: We would like to use a Custom expander here eventually to do
394 // the optimal thing for SSE vs. the default expansion in the legalizer.
395 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
396 else
397 // With SSE3 we can use fisttpll to convert to a signed i64; without
398 // SSE, we're stuck with a fistpll.
399 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
400 }
402 if (isTargetFTOL()) {
403 // Use the _ftol2 runtime function, which has a pseudo-instruction
404 // to handle its weird calling convention.
405 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
406 }
408 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
409 if (!X86ScalarSSEf64) {
410 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
411 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
412 if (Subtarget->is64Bit()) {
413 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
414 // Without SSE, i64->f64 goes through memory.
415 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
416 }
417 }
419 // Scalar integer divide and remainder are lowered to use operations that
420 // produce two results, to match the available instructions. This exposes
421 // the two-result form to trivial CSE, which is able to combine x/y and x%y
422 // into a single instruction.
423 //
424 // Scalar integer multiply-high is also lowered to use two-result
425 // operations, to match the available instructions. However, plain multiply
426 // (low) operations are left as Legal, as there are single-result
427 // instructions for this in x86. Using the two-result multiply instructions
428 // when both high and low results are needed must be arranged by dagcombine.
429 for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
430 MVT VT = IntVTs[i];
431 setOperationAction(ISD::MULHS, VT, Expand);
432 setOperationAction(ISD::MULHU, VT, Expand);
433 setOperationAction(ISD::SDIV, VT, Expand);
434 setOperationAction(ISD::UDIV, VT, Expand);
435 setOperationAction(ISD::SREM, VT, Expand);
436 setOperationAction(ISD::UREM, VT, Expand);
438 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
439 setOperationAction(ISD::ADDC, VT, Custom);
440 setOperationAction(ISD::ADDE, VT, Custom);
441 setOperationAction(ISD::SUBC, VT, Custom);
442 setOperationAction(ISD::SUBE, VT, Custom);
443 }
445 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
446 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
447 setOperationAction(ISD::BR_CC , MVT::f32, Expand);
448 setOperationAction(ISD::BR_CC , MVT::f64, Expand);
449 setOperationAction(ISD::BR_CC , MVT::f80, Expand);
450 setOperationAction(ISD::BR_CC , MVT::i8, Expand);
451 setOperationAction(ISD::BR_CC , MVT::i16, Expand);
452 setOperationAction(ISD::BR_CC , MVT::i32, Expand);
453 setOperationAction(ISD::BR_CC , MVT::i64, Expand);
454 setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
455 setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
456 setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
457 setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
458 setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
459 setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
460 setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
461 if (Subtarget->is64Bit())
462 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
463 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
464 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
465 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
466 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
467 setOperationAction(ISD::FREM , MVT::f32 , Expand);
468 setOperationAction(ISD::FREM , MVT::f64 , Expand);
469 setOperationAction(ISD::FREM , MVT::f80 , Expand);
470 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
472 // Promote the i8 variants and force them on up to i32 which has a shorter
473 // encoding.
474 setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
475 AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
476 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
477 AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
478 if (Subtarget->hasBMI()) {
479 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
480 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
481 if (Subtarget->is64Bit())
482 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
483 } else {
484 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
485 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
486 if (Subtarget->is64Bit())
487 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
488 }
490 if (Subtarget->hasLZCNT()) {
491 // When promoting the i8 variants, force them to i32 for a shorter
492 // encoding.
493 setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
494 AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
495 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
496 AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
497 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
498 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
499 if (Subtarget->is64Bit())
500 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
501 } else {
502 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
503 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
504 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
505 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
506 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
507 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
508 if (Subtarget->is64Bit()) {
509 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
510 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
511 }
512 }
514 // Special handling for half-precision floating point conversions.
515 // If we don't have F16C support, then lower half float conversions
516 // into library calls.
517 if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
518 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
519 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
520 }
522 // There's never any support for operations beyond MVT::f32.
523 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
524 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
525 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
526 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
528 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
530 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
531 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
532 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
533 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
535 if (Subtarget->hasPOPCNT()) {
536 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
537 } else {
538 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
539 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
540 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
541 if (Subtarget->is64Bit())
542 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
543 }
545 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
547 if (!Subtarget->hasMOVBE())
548 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
550 // These should be promoted to a larger select which is supported.
551 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
552 // X86 wants to expand cmov itself.
553 setOperationAction(ISD::SELECT , MVT::i8 , Custom);
554 setOperationAction(ISD::SELECT , MVT::i16 , Custom);
555 setOperationAction(ISD::SELECT , MVT::i32 , Custom);
556 setOperationAction(ISD::SELECT , MVT::f32 , Custom);
557 setOperationAction(ISD::SELECT , MVT::f64 , Custom);
558 setOperationAction(ISD::SELECT , MVT::f80 , Custom);
559 setOperationAction(ISD::SETCC , MVT::i8 , Custom);
560 setOperationAction(ISD::SETCC , MVT::i16 , Custom);
561 setOperationAction(ISD::SETCC , MVT::i32 , Custom);
562 setOperationAction(ISD::SETCC , MVT::f32 , Custom);
563 setOperationAction(ISD::SETCC , MVT::f64 , Custom);
564 setOperationAction(ISD::SETCC , MVT::f80 , Custom);
565 if (Subtarget->is64Bit()) {
566 setOperationAction(ISD::SELECT , MVT::i64 , Custom);
567 setOperationAction(ISD::SETCC , MVT::i64 , Custom);
568 }
569 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
570 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
571 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
572 // support continuation, user-level threading, and etc.. As a result, no
573 // other SjLj exception interfaces are implemented and please don't build
574 // your own exception handling based on them.
575 // LLVM/Clang supports zero-cost DWARF exception handling.
576 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
577 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
579 // Darwin ABI issue.
580 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
581 setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
582 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
583 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
584 if (Subtarget->is64Bit())
585 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
586 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
587 setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
588 if (Subtarget->is64Bit()) {
589 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
590 setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
591 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
592 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
593 setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
594 }
595 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
596 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
597 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
598 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
599 if (Subtarget->is64Bit()) {
600 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
601 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
602 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
603 }
605 if (Subtarget->hasSSE1())
606 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
608 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
610 // Expand certain atomics
611 for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
612 MVT VT = IntVTs[i];
613 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
614 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
615 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
616 }
618 if (Subtarget->hasCmpxchg16b()) {
619 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
620 }
622 // FIXME - use subtarget debug flags
623 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
624 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
625 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
626 }
628 if (Subtarget->is64Bit()) {
629 setExceptionPointerRegister(X86::RAX);
630 setExceptionSelectorRegister(X86::RDX);
631 } else {
632 setExceptionPointerRegister(X86::EAX);
633 setExceptionSelectorRegister(X86::EDX);
634 }
635 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
636 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
638 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
639 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
641 setOperationAction(ISD::TRAP, MVT::Other, Legal);
642 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
644 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
645 setOperationAction(ISD::VASTART , MVT::Other, Custom);
646 setOperationAction(ISD::VAEND , MVT::Other, Expand);
647 if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
648 // TargetInfo::X86_64ABIBuiltinVaList
649 setOperationAction(ISD::VAARG , MVT::Other, Custom);
650 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
651 } else {
652 // TargetInfo::CharPtrBuiltinVaList
653 setOperationAction(ISD::VAARG , MVT::Other, Expand);
654 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
655 }
657 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
658 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
660 setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
662 if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
663 // f32 and f64 use SSE.
664 // Set up the FP register classes.
665 addRegisterClass(MVT::f32, &X86::FR32RegClass);
666 addRegisterClass(MVT::f64, &X86::FR64RegClass);
668 // Use ANDPD to simulate FABS.
669 setOperationAction(ISD::FABS , MVT::f64, Custom);
670 setOperationAction(ISD::FABS , MVT::f32, Custom);
672 // Use XORP to simulate FNEG.
673 setOperationAction(ISD::FNEG , MVT::f64, Custom);
674 setOperationAction(ISD::FNEG , MVT::f32, Custom);
676 // Use ANDPD and ORPD to simulate FCOPYSIGN.
677 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
678 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
680 // Lower this to FGETSIGNx86 plus an AND.
681 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
682 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
684 // We don't support sin/cos/fmod
685 setOperationAction(ISD::FSIN , MVT::f64, Expand);
686 setOperationAction(ISD::FCOS , MVT::f64, Expand);
687 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
688 setOperationAction(ISD::FSIN , MVT::f32, Expand);
689 setOperationAction(ISD::FCOS , MVT::f32, Expand);
690 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
692 // Expand FP immediates into loads from the stack, except for the special
693 // cases we handle.
694 addLegalFPImmediate(APFloat(+0.0)); // xorpd
695 addLegalFPImmediate(APFloat(+0.0f)); // xorps
696 } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
697 // Use SSE for f32, x87 for f64.
698 // Set up the FP register classes.
699 addRegisterClass(MVT::f32, &X86::FR32RegClass);
700 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
702 // Use ANDPS to simulate FABS.
703 setOperationAction(ISD::FABS , MVT::f32, Custom);
705 // Use XORP to simulate FNEG.
706 setOperationAction(ISD::FNEG , MVT::f32, Custom);
708 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
710 // Use ANDPS and ORPS to simulate FCOPYSIGN.
711 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
712 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
714 // We don't support sin/cos/fmod
715 setOperationAction(ISD::FSIN , MVT::f32, Expand);
716 setOperationAction(ISD::FCOS , MVT::f32, Expand);
717 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
719 // Special cases we handle for FP constants.
720 addLegalFPImmediate(APFloat(+0.0f)); // xorps
721 addLegalFPImmediate(APFloat(+0.0)); // FLD0
722 addLegalFPImmediate(APFloat(+1.0)); // FLD1
723 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
724 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
726 if (!TM.Options.UnsafeFPMath) {
727 setOperationAction(ISD::FSIN , MVT::f64, Expand);
728 setOperationAction(ISD::FCOS , MVT::f64, Expand);
729 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
730 }
731 } else if (!TM.Options.UseSoftFloat) {
732 // f32 and f64 in x87.
733 // Set up the FP register classes.
734 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
735 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
737 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
738 setOperationAction(ISD::UNDEF, MVT::f32, Expand);
739 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
740 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
742 if (!TM.Options.UnsafeFPMath) {
743 setOperationAction(ISD::FSIN , MVT::f64, Expand);
744 setOperationAction(ISD::FSIN , MVT::f32, Expand);
745 setOperationAction(ISD::FCOS , MVT::f64, Expand);
746 setOperationAction(ISD::FCOS , MVT::f32, Expand);
747 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
748 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
749 }
750 addLegalFPImmediate(APFloat(+0.0)); // FLD0
751 addLegalFPImmediate(APFloat(+1.0)); // FLD1
752 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
753 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
754 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
755 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
756 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
757 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
758 }
760 // We don't support FMA.
761 setOperationAction(ISD::FMA, MVT::f64, Expand);
762 setOperationAction(ISD::FMA, MVT::f32, Expand);
764 // Long double always uses X87.
765 if (!TM.Options.UseSoftFloat) {
766 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
767 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
768 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
769 {
770 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
771 addLegalFPImmediate(TmpFlt); // FLD0
772 TmpFlt.changeSign();
773 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
775 bool ignored;
776 APFloat TmpFlt2(+1.0);
777 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
778 &ignored);
779 addLegalFPImmediate(TmpFlt2); // FLD1
780 TmpFlt2.changeSign();
781 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
782 }
784 if (!TM.Options.UnsafeFPMath) {
785 setOperationAction(ISD::FSIN , MVT::f80, Expand);
786 setOperationAction(ISD::FCOS , MVT::f80, Expand);
787 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
788 }
790 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
791 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
792 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
793 setOperationAction(ISD::FRINT, MVT::f80, Expand);
794 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
795 setOperationAction(ISD::FMA, MVT::f80, Expand);
796 }
798 // Always use a library call for pow.
799 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
800 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
801 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
803 setOperationAction(ISD::FLOG, MVT::f80, Expand);
804 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
805 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
806 setOperationAction(ISD::FEXP, MVT::f80, Expand);
807 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
808 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
809 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
811 // First set operation action for all vector types to either promote
812 // (for widening) or expand (for scalarization). Then we will selectively
813 // turn on ones that can be effectively codegen'd.
814 for (MVT VT : MVT::vector_valuetypes()) {
815 setOperationAction(ISD::ADD , VT, Expand);
816 setOperationAction(ISD::SUB , VT, Expand);
817 setOperationAction(ISD::FADD, VT, Expand);
818 setOperationAction(ISD::FNEG, VT, Expand);
819 setOperationAction(ISD::FSUB, VT, Expand);
820 setOperationAction(ISD::MUL , VT, Expand);
821 setOperationAction(ISD::FMUL, VT, Expand);
822 setOperationAction(ISD::SDIV, VT, Expand);
823 setOperationAction(ISD::UDIV, VT, Expand);
824 setOperationAction(ISD::FDIV, VT, Expand);
825 setOperationAction(ISD::SREM, VT, Expand);
826 setOperationAction(ISD::UREM, VT, Expand);
827 setOperationAction(ISD::LOAD, VT, Expand);
828 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
829 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
830 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
831 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
832 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
833 setOperationAction(ISD::FABS, VT, Expand);
834 setOperationAction(ISD::FSIN, VT, Expand);
835 setOperationAction(ISD::FSINCOS, VT, Expand);
836 setOperationAction(ISD::FCOS, VT, Expand);
837 setOperationAction(ISD::FSINCOS, VT, Expand);
838 setOperationAction(ISD::FREM, VT, Expand);
839 setOperationAction(ISD::FMA, VT, Expand);
840 setOperationAction(ISD::FPOWI, VT, Expand);
841 setOperationAction(ISD::FSQRT, VT, Expand);
842 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
843 setOperationAction(ISD::FFLOOR, VT, Expand);
844 setOperationAction(ISD::FCEIL, VT, Expand);
845 setOperationAction(ISD::FTRUNC, VT, Expand);
846 setOperationAction(ISD::FRINT, VT, Expand);
847 setOperationAction(ISD::FNEARBYINT, VT, Expand);
848 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
849 setOperationAction(ISD::MULHS, VT, Expand);
850 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
851 setOperationAction(ISD::MULHU, VT, Expand);
852 setOperationAction(ISD::SDIVREM, VT, Expand);
853 setOperationAction(ISD::UDIVREM, VT, Expand);
854 setOperationAction(ISD::FPOW, VT, Expand);
855 setOperationAction(ISD::CTPOP, VT, Expand);
856 setOperationAction(ISD::CTTZ, VT, Expand);
857 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
858 setOperationAction(ISD::CTLZ, VT, Expand);
859 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
860 setOperationAction(ISD::SHL, VT, Expand);
861 setOperationAction(ISD::SRA, VT, Expand);
862 setOperationAction(ISD::SRL, VT, Expand);
863 setOperationAction(ISD::ROTL, VT, Expand);
864 setOperationAction(ISD::ROTR, VT, Expand);
865 setOperationAction(ISD::BSWAP, VT, Expand);
866 setOperationAction(ISD::SETCC, VT, Expand);
867 setOperationAction(ISD::FLOG, VT, Expand);
868 setOperationAction(ISD::FLOG2, VT, Expand);
869 setOperationAction(ISD::FLOG10, VT, Expand);
870 setOperationAction(ISD::FEXP, VT, Expand);
871 setOperationAction(ISD::FEXP2, VT, Expand);
872 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
873 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
874 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
875 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
876 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
877 setOperationAction(ISD::TRUNCATE, VT, Expand);
878 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
879 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
880 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
881 setOperationAction(ISD::VSELECT, VT, Expand);
882 setOperationAction(ISD::SELECT_CC, VT, Expand);
883 for (MVT InnerVT : MVT::vector_valuetypes()) {
884 setTruncStoreAction(InnerVT, VT, Expand);
886 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
887 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
889 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
890 // types, we have to deal with them whether we ask for Expansion or not.
891 // Setting Expand causes its own optimisation problems though, so leave
892 // them legal.
893 if (VT.getVectorElementType() == MVT::i1)
894 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
895 }
896 }
898 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
899 // with -msoft-float, disable use of MMX as well.
900 if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
901 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
902 // No operations on x86mmx supported, everything uses intrinsics.
903 }
905 // MMX-sized vectors (other than x86mmx) are expected to be expanded
906 // into smaller operations.
907 setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
908 setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
909 setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
910 setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
911 setOperationAction(ISD::AND, MVT::v8i8, Expand);
912 setOperationAction(ISD::AND, MVT::v4i16, Expand);
913 setOperationAction(ISD::AND, MVT::v2i32, Expand);
914 setOperationAction(ISD::AND, MVT::v1i64, Expand);
915 setOperationAction(ISD::OR, MVT::v8i8, Expand);
916 setOperationAction(ISD::OR, MVT::v4i16, Expand);
917 setOperationAction(ISD::OR, MVT::v2i32, Expand);
918 setOperationAction(ISD::OR, MVT::v1i64, Expand);
919 setOperationAction(ISD::XOR, MVT::v8i8, Expand);
920 setOperationAction(ISD::XOR, MVT::v4i16, Expand);
921 setOperationAction(ISD::XOR, MVT::v2i32, Expand);
922 setOperationAction(ISD::XOR, MVT::v1i64, Expand);
923 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
924 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
925 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
926 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
927 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
928 setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
929 setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
930 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
931 setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
932 setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
933 setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
934 setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
935 setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
937 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
938 addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
940 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
941 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
942 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
943 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
944 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
945 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
946 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
947 setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
948 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
949 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
950 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
951 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
952 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
953 }
955 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
956 addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
958 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
959 // registers cannot be used even for integer operations.
960 addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
961 addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
962 addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
963 addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
965 setOperationAction(ISD::ADD, MVT::v16i8, Legal);
966 setOperationAction(ISD::ADD, MVT::v8i16, Legal);
967 setOperationAction(ISD::ADD, MVT::v4i32, Legal);
968 setOperationAction(ISD::ADD, MVT::v2i64, Legal);
969 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
970 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
971 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
972 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
973 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
974 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
975 setOperationAction(ISD::SUB, MVT::v16i8, Legal);
976 setOperationAction(ISD::SUB, MVT::v8i16, Legal);
977 setOperationAction(ISD::SUB, MVT::v4i32, Legal);
978 setOperationAction(ISD::SUB, MVT::v2i64, Legal);
979 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
980 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
981 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
982 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
983 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
984 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
985 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
986 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
988 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
989 setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
990 setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
991 setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
993 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
994 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
995 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
996 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
997 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
999 // Only provide customized ctpop vector bit twiddling for vector types we
1000 // know to perform better than using the popcnt instructions on each vector
1001 // element. If popcnt isn't supported, always provide the custom version.
1002 if (!Subtarget->hasPOPCNT()) {
1003 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
1004 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
1005 }
1007 // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
1008 for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
1009 MVT VT = (MVT::SimpleValueType)i;
1010 // Do not attempt to custom lower non-power-of-2 vectors
1011 if (!isPowerOf2_32(VT.getVectorNumElements()))
1012 continue;
1013 // Do not attempt to custom lower non-128-bit vectors
1014 if (!VT.is128BitVector())
1015 continue;
1016 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1017 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1018 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1019 }
1021 // We support custom legalizing of sext and anyext loads for specific
1022 // memory vector types which we can load as a scalar (or sequence of
1023 // scalars) and extend in-register to a legal 128-bit vector type. For sext
1024 // loads these must work with a single scalar load.
1025 for (MVT VT : MVT::integer_vector_valuetypes()) {
1026 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
1027 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
1028 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
1029 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
1030 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
1031 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
1032 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
1033 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
1034 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
1035 }
1037 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1038 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1039 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
1040 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
1041 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
1042 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
1044 if (Subtarget->is64Bit()) {
1045 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1046 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1047 }
1049 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
1050 for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
1051 MVT VT = (MVT::SimpleValueType)i;
1053 // Do not attempt to promote non-128-bit vectors
1054 if (!VT.is128BitVector())
1055 continue;
1057 setOperationAction(ISD::AND, VT, Promote);
1058 AddPromotedToType (ISD::AND, VT, MVT::v2i64);
1059 setOperationAction(ISD::OR, VT, Promote);
1060 AddPromotedToType (ISD::OR, VT, MVT::v2i64);
1061 setOperationAction(ISD::XOR, VT, Promote);
1062 AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
1063 setOperationAction(ISD::LOAD, VT, Promote);
1064 AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
1065 setOperationAction(ISD::SELECT, VT, Promote);
1066 AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
1067 }
1069 // Custom lower v2i64 and v2f64 selects.
1070 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1071 setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
1072 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1073 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1075 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1076 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1078 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1079 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1080 // As there is no 64-bit GPR available, we need build a special custom
1081 // sequence to convert from v2i32 to v2f32.
1082 if (!Subtarget->is64Bit())
1083 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1085 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1086 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1088 for (MVT VT : MVT::fp_vector_valuetypes())
1089 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
1091 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1092 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1093 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1094 }
1096 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
1097 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1098 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1099 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1100 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1101 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1102 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1103 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1104 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1105 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1106 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1108 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1109 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
1110 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1111 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1112 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1113 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1114 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1115 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1116 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1117 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1119 // FIXME: Do we need to handle scalar-to-vector here?
1120 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1122 setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
1123 setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
1124 setOperationAction(ISD::VSELECT, MVT::v4i32, Custom);
1125 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
1126 setOperationAction(ISD::VSELECT, MVT::v8i16, Custom);
1127 // There is no BLENDI for byte vectors. We don't need to custom lower
1128 // some vselects for now.
1129 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1131 // SSE41 brings specific instructions for doing vector sign extend even in
1132 // cases where we don't have SRA.
1133 for (MVT VT : MVT::integer_vector_valuetypes()) {
1134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
1135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
1136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
1137 }
1139 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1140 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
1141 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1142 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1143 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1144 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1145 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1147 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
1148 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1149 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1150 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1151 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1152 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1154 // i8 and i16 vectors are custom because the source register and source
1155 // source memory operand types are not the same width. f32 vectors are
1156 // custom since the immediate controlling the insert encodes additional
1157 // information.
1158 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1159 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1160 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1161 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1163 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
1164 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
1165 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
1166 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1168 // FIXME: these should be Legal, but that's only for the case where
1169 // the index is constant. For now custom expand to deal with that.
1170 if (Subtarget->is64Bit()) {
1171 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1172 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1173 }
1174 }
1176 if (Subtarget->hasSSE2()) {
1177 setOperationAction(ISD::SRL, MVT::v8i16, Custom);
1178 setOperationAction(ISD::SRL, MVT::v16i8, Custom);
1180 setOperationAction(ISD::SHL, MVT::v8i16, Custom);
1181 setOperationAction(ISD::SHL, MVT::v16i8, Custom);
1183 setOperationAction(ISD::SRA, MVT::v8i16, Custom);
1184 setOperationAction(ISD::SRA, MVT::v16i8, Custom);
1186 // In the customized shift lowering, the legal cases in AVX2 will be
1187 // recognized.
1188 setOperationAction(ISD::SRL, MVT::v2i64, Custom);
1189 setOperationAction(ISD::SRL, MVT::v4i32, Custom);
1191 setOperationAction(ISD::SHL, MVT::v2i64, Custom);
1192 setOperationAction(ISD::SHL, MVT::v4i32, Custom);
1194 setOperationAction(ISD::SRA, MVT::v4i32, Custom);
1195 }
1197 if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
1198 addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
1199 addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
1200 addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
1201 addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
1202 addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
1203 addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
1205 setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
1206 setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
1207 setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
1209 setOperationAction(ISD::FADD, MVT::v8f32, Legal);
1210 setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
1211 setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
1212 setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
1213 setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
1214 setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
1215 setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
1216 setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
1217 setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
1218 setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
1219 setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
1220 setOperationAction(ISD::FABS, MVT::v8f32, Custom);
1222 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
1223 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
1224 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
1225 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1226 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1227 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1228 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1229 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1230 setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
1231 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
1232 setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
1233 setOperationAction(ISD::FABS, MVT::v4f64, Custom);
1235 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1236 // even though v8i16 is a legal type.
1237 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1238 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1239 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1241 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1242 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1243 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1245 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1246 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1248 for (MVT VT : MVT::fp_vector_valuetypes())
1249 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1251 setOperationAction(ISD::SRL, MVT::v16i16, Custom);
1252 setOperationAction(ISD::SRL, MVT::v32i8, Custom);
1254 setOperationAction(ISD::SHL, MVT::v16i16, Custom);
1255 setOperationAction(ISD::SHL, MVT::v32i8, Custom);
1257 setOperationAction(ISD::SRA, MVT::v16i16, Custom);
1258 setOperationAction(ISD::SRA, MVT::v32i8, Custom);
1260 setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
1261 setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
1262 setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
1263 setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
1265 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1266 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1267 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1269 setOperationAction(ISD::VSELECT, MVT::v4f64, Custom);
1270 setOperationAction(ISD::VSELECT, MVT::v4i64, Custom);
1271 setOperationAction(ISD::VSELECT, MVT::v8i32, Custom);
1272 setOperationAction(ISD::VSELECT, MVT::v8f32, Custom);
1274 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1275 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
1276 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1277 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
1278 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
1279 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
1280 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
1281 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
1282 setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
1283 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1284 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1285 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1287 if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
1288 setOperationAction(ISD::FMA, MVT::v8f32, Legal);
1289 setOperationAction(ISD::FMA, MVT::v4f64, Legal);
1290 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
1291 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1292 setOperationAction(ISD::FMA, MVT::f32, Legal);
1293 setOperationAction(ISD::FMA, MVT::f64, Legal);
1294 }
1296 if (Subtarget->hasInt256()) {
1297 setOperationAction(ISD::ADD, MVT::v4i64, Legal);
1298 setOperationAction(ISD::ADD, MVT::v8i32, Legal);
1299 setOperationAction(ISD::ADD, MVT::v16i16, Legal);
1300 setOperationAction(ISD::ADD, MVT::v32i8, Legal);
1302 setOperationAction(ISD::SUB, MVT::v4i64, Legal);
1303 setOperationAction(ISD::SUB, MVT::v8i32, Legal);
1304 setOperationAction(ISD::SUB, MVT::v16i16, Legal);
1305 setOperationAction(ISD::SUB, MVT::v32i8, Legal);
1307 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1308 setOperationAction(ISD::MUL, MVT::v8i32, Legal);
1309 setOperationAction(ISD::MUL, MVT::v16i16, Legal);
1310 // Don't lower v32i8 because there is no 128-bit byte mul
1312 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1313 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1314 setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
1315 setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
1317 setOperationAction(ISD::VSELECT, MVT::v16i16, Custom);
1318 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1320 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1321 // when we have a 256bit-wide blend with immediate.
1322 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1324 // Only provide customized ctpop vector bit twiddling for vector types we
1325 // know to perform better than using the popcnt instructions on each
1326 // vector element. If popcnt isn't supported, always provide the custom
1327 // version.
1328 if (!Subtarget->hasPOPCNT())
1329 setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
1331 // Custom CTPOP always performs better on natively supported v8i32
1332 setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
1334 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1335 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1336 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1337 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1338 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1339 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1340 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1342 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1343 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1344 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1345 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1346 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1347 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1348 } else {
1349 setOperationAction(ISD::ADD, MVT::v4i64, Custom);
1350 setOperationAction(ISD::ADD, MVT::v8i32, Custom);
1351 setOperationAction(ISD::ADD, MVT::v16i16, Custom);
1352 setOperationAction(ISD::ADD, MVT::v32i8, Custom);
1354 setOperationAction(ISD::SUB, MVT::v4i64, Custom);
1355 setOperationAction(ISD::SUB, MVT::v8i32, Custom);
1356 setOperationAction(ISD::SUB, MVT::v16i16, Custom);
1357 setOperationAction(ISD::SUB, MVT::v32i8, Custom);
1359 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1360 setOperationAction(ISD::MUL, MVT::v8i32, Custom);
1361 setOperationAction(ISD::MUL, MVT::v16i16, Custom);
1362 // Don't lower v32i8 because there is no 128-bit byte mul
1363 }
1365 // In the customized shift lowering, the legal cases in AVX2 will be
1366 // recognized.
1367 setOperationAction(ISD::SRL, MVT::v4i64, Custom);
1368 setOperationAction(ISD::SRL, MVT::v8i32, Custom);
1370 setOperationAction(ISD::SHL, MVT::v4i64, Custom);
1371 setOperationAction(ISD::SHL, MVT::v8i32, Custom);
1373 setOperationAction(ISD::SRA, MVT::v8i32, Custom);
1375 // Custom lower several nodes for 256-bit types.
1376 for (MVT VT : MVT::vector_valuetypes()) {
1377 if (VT.getScalarSizeInBits() >= 32) {
1378 setOperationAction(ISD::MLOAD, VT, Legal);
1379 setOperationAction(ISD::MSTORE, VT, Legal);
1380 }
1381 // Extract subvector is special because the value type
1382 // (result) is 128-bit but the source is 256-bit wide.
1383 if (VT.is128BitVector()) {
1384 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1385 }
1386 // Do not attempt to custom lower other non-256-bit vectors
1387 if (!VT.is256BitVector())
1388 continue;
1390 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1391 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1392 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1393 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1394 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1395 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1396 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1397 }
1399 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1400 for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
1401 MVT VT = (MVT::SimpleValueType)i;
1403 // Do not attempt to promote non-256-bit vectors
1404 if (!VT.is256BitVector())
1405 continue;
1407 setOperationAction(ISD::AND, VT, Promote);
1408 AddPromotedToType (ISD::AND, VT, MVT::v4i64);
1409 setOperationAction(ISD::OR, VT, Promote);
1410 AddPromotedToType (ISD::OR, VT, MVT::v4i64);
1411 setOperationAction(ISD::XOR, VT, Promote);
1412 AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
1413 setOperationAction(ISD::LOAD, VT, Promote);
1414 AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
1415 setOperationAction(ISD::SELECT, VT, Promote);
1416 AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
1417 }
1418 }
1420 if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) {
1421 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1422 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1423 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1424 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1426 addRegisterClass(MVT::i1, &X86::VK1RegClass);
1427 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1428 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1430 for (MVT VT : MVT::fp_vector_valuetypes())
1431 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1433 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1434 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1435 setOperationAction(ISD::XOR, MVT::i1, Legal);
1436 setOperationAction(ISD::OR, MVT::i1, Legal);
1437 setOperationAction(ISD::AND, MVT::i1, Legal);
1438 setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
1439 setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
1440 setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
1441 setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
1442 setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
1444 setOperationAction(ISD::FADD, MVT::v16f32, Legal);
1445 setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
1446 setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
1447 setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
1448 setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
1449 setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
1451 setOperationAction(ISD::FADD, MVT::v8f64, Legal);
1452 setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
1453 setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
1454 setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
1455 setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
1456 setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
1457 setOperationAction(ISD::FMA, MVT::v8f64, Legal);
1458 setOperationAction(ISD::FMA, MVT::v16f32, Legal);
1460 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
1461 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
1462 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
1463 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
1464 if (Subtarget->is64Bit()) {
1465 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
1466 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
1467 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
1468 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
1469 }
1470 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1471 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1472 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1473 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1474 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1475 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1476 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1477 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1478 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1479 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1480 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1481 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1482 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1483 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1485 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
1486 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1487 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1488 setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
1489 setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
1490 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1491 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1492 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1493 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1494 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1495 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1496 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1497 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1499 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1500 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1501 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1502 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1503 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1504 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal);
1506 setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
1507 setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
1509 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1511 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
1512 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
1513 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
1514 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
1515 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
1516 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
1517 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1518 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1519 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1521 setOperationAction(ISD::ADD, MVT::v8i64, Legal);
1522 setOperationAction(ISD::ADD, MVT::v16i32, Legal);
1524 setOperationAction(ISD::SUB, MVT::v8i64, Legal);
1525 setOperationAction(ISD::SUB, MVT::v16i32, Legal);
1527 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1529 setOperationAction(ISD::SRL, MVT::v8i64, Custom);
1530 setOperationAction(ISD::SRL, MVT::v16i32, Custom);
1532 setOperationAction(ISD::SHL, MVT::v8i64, Custom);
1533 setOperationAction(ISD::SHL, MVT::v16i32, Custom);
1535 setOperationAction(ISD::SRA, MVT::v8i64, Custom);
1536 setOperationAction(ISD::SRA, MVT::v16i32, Custom);
1538 setOperationAction(ISD::AND, MVT::v8i64, Legal);
1539 setOperationAction(ISD::OR, MVT::v8i64, Legal);
1540 setOperationAction(ISD::XOR, MVT::v8i64, Legal);
1541 setOperationAction(ISD::AND, MVT::v16i32, Legal);
1542 setOperationAction(ISD::OR, MVT::v16i32, Legal);
1543 setOperationAction(ISD::XOR, MVT::v16i32, Legal);
1545 if (Subtarget->hasCDI()) {
1546 setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
1547 setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
1548 }
1550 // Custom lower several nodes.
1551 for (MVT VT : MVT::vector_valuetypes()) {
1552 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1553 // Extract subvector is special because the value type
1554 // (result) is 256/128-bit but the source is 512-bit wide.
1555 if (VT.is128BitVector() || VT.is256BitVector()) {
1556 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1557 }
1558 if (VT.getVectorElementType() == MVT::i1)
1559 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1561 // Do not attempt to custom lower other non-512-bit vectors
1562 if (!VT.is512BitVector())
1563 continue;
1565 if ( EltSize >= 32) {
1566 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1567 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1568 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1569 setOperationAction(ISD::VSELECT, VT, Legal);
1570 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1571 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1572 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1573 setOperationAction(ISD::MLOAD, VT, Legal);
1574 setOperationAction(ISD::MSTORE, VT, Legal);
1575 }
1576 }
1577 for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
1578 MVT VT = (MVT::SimpleValueType)i;
1580 // Do not attempt to promote non-512-bit vectors.
1581 if (!VT.is512BitVector())
1582 continue;
1584 setOperationAction(ISD::SELECT, VT, Promote);
1585 AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
1586 }
1587 }// has AVX-512
1589 if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
1590 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1591 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1593 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1594 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1596 setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
1597 setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
1598 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1599 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1600 setOperationAction(ISD::ADD, MVT::v32i16, Legal);
1601 setOperationAction(ISD::ADD, MVT::v64i8, Legal);
1602 setOperationAction(ISD::SUB, MVT::v32i16, Legal);
1603 setOperationAction(ISD::SUB, MVT::v64i8, Legal);
1604 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1606 for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
1607 const MVT VT = (MVT::SimpleValueType)i;
1609 const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1611 // Do not attempt to promote non-512-bit vectors.
1612 if (!VT.is512BitVector())
1613 continue;
1615 if (EltSize < 32) {
1616 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1617 setOperationAction(ISD::VSELECT, VT, Legal);
1618 }
1619 }
1620 }
1622 if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
1623 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1624 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1626 setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
1627 setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
1628 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
1630 setOperationAction(ISD::AND, MVT::v8i32, Legal);
1631 setOperationAction(ISD::OR, MVT::v8i32, Legal);
1632 setOperationAction(ISD::XOR, MVT::v8i32, Legal);
1633 setOperationAction(ISD::AND, MVT::v4i32, Legal);
1634 setOperationAction(ISD::OR, MVT::v4i32, Legal);
1635 setOperationAction(ISD::XOR, MVT::v4i32, Legal);
1636 }
1638 // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
1639 // of this type with custom code.
1640 for (MVT VT : MVT::vector_valuetypes())
1641 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1643 // We want to custom lower some of our intrinsics.
1644 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1645 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1646 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1647 if (!Subtarget->is64Bit())
1648 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1650 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1651 // handle type legalization for these operations here.
1652 //
1653 // FIXME: We really should do custom legalization for addition and
1654 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1655 // than generic legalization for 64-bit multiplication-with-overflow, though.
1656 for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
1657 // Add/Sub/Mul with overflow operations are custom lowered.
1658 MVT VT = IntVTs[i];
1659 setOperationAction(ISD::SADDO, VT, Custom);
1660 setOperationAction(ISD::UADDO, VT, Custom);
1661 setOperationAction(ISD::SSUBO, VT, Custom);
1662 setOperationAction(ISD::USUBO, VT, Custom);
1663 setOperationAction(ISD::SMULO, VT, Custom);
1664 setOperationAction(ISD::UMULO, VT, Custom);
1665 }
1668 if (!Subtarget->is64Bit()) {
1669 // These libcalls are not available in 32-bit.
1670 setLibcallName(RTLIB::SHL_I128, nullptr);
1671 setLibcallName(RTLIB::SRL_I128, nullptr);
1672 setLibcallName(RTLIB::SRA_I128, nullptr);
1673 }
1675 // Combine sin / cos into one node or libcall if possible.
1676 if (Subtarget->hasSinCos()) {
1677 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1678 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1679 if (Subtarget->isTargetDarwin()) {
1680 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1681 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1682 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1683 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1684 }
1685 }
1687 if (Subtarget->isTargetWin64()) {
1688 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1689 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1690 setOperationAction(ISD::SREM, MVT::i128, Custom);
1691 setOperationAction(ISD::UREM, MVT::i128, Custom);
1692 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1693 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1694 }
1696 // We have target-specific dag combine patterns for the following nodes:
1697 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1698 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1699 setTargetDAGCombine(ISD::VSELECT);
1700 setTargetDAGCombine(ISD::SELECT);
1701 setTargetDAGCombine(ISD::SHL);
1702 setTargetDAGCombine(ISD::SRA);
1703 setTargetDAGCombine(ISD::SRL);
1704 setTargetDAGCombine(ISD::OR);
1705 setTargetDAGCombine(ISD::AND);
1706 setTargetDAGCombine(ISD::ADD);
1707 setTargetDAGCombine(ISD::FADD);
1708 setTargetDAGCombine(ISD::FSUB);
1709 setTargetDAGCombine(ISD::FMA);
1710 setTargetDAGCombine(ISD::SUB);
1711 setTargetDAGCombine(ISD::LOAD);
1712 setTargetDAGCombine(ISD::STORE);
1713 setTargetDAGCombine(ISD::ZERO_EXTEND);
1714 setTargetDAGCombine(ISD::ANY_EXTEND);
1715 setTargetDAGCombine(ISD::SIGN_EXTEND);
1716 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1717 setTargetDAGCombine(ISD::TRUNCATE);
1718 setTargetDAGCombine(ISD::SINT_TO_FP);
1719 setTargetDAGCombine(ISD::SETCC);
1720 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1721 setTargetDAGCombine(ISD::BUILD_VECTOR);
1722 if (Subtarget->is64Bit())
1723 setTargetDAGCombine(ISD::MUL);
1724 setTargetDAGCombine(ISD::XOR);
1726 computeRegisterProperties();
1728 // On Darwin, -Os means optimize for size without hurting performance,
1729 // do not reduce the limit.
1730 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1731 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
1732 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1733 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
1734 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1735 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
1736 setPrefLoopAlignment(4); // 2^4 bytes.
1738 // Predictable cmov don't hurt on atom because it's in-order.
1739 PredictableSelectIsExpensive = !Subtarget->isAtom();
1740 EnableExtLdPromotion = true;
1741 setPrefFunctionAlignment(4); // 2^4 bytes.
1743 verifyIntrinsicTables();
1744 }
1746 // This has so far only been implemented for 64-bit MachO.
1747 bool X86TargetLowering::useLoadStackGuardNode() const {
1748 return Subtarget->isTargetMachO() && Subtarget->is64Bit();
1749 }
1751 TargetLoweringBase::LegalizeTypeAction
1752 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1753 if (ExperimentalVectorWideningLegalization &&
1754 VT.getVectorNumElements() != 1 &&
1755 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1756 return TypeWidenVector;
1758 return TargetLoweringBase::getPreferredVectorAction(VT);
1759 }
1761 EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
1762 if (!VT.isVector())
1763 return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
1765 const unsigned NumElts = VT.getVectorNumElements();
1766 const EVT EltVT = VT.getVectorElementType();
1767 if (VT.is512BitVector()) {
1768 if (Subtarget->hasAVX512())
1769 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1770 EltVT == MVT::f32 || EltVT == MVT::f64)
1771 switch(NumElts) {
1772 case 8: return MVT::v8i1;
1773 case 16: return MVT::v16i1;
1774 }
1775 if (Subtarget->hasBWI())
1776 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1777 switch(NumElts) {
1778 case 32: return MVT::v32i1;
1779 case 64: return MVT::v64i1;
1780 }
1781 }
1783 if (VT.is256BitVector() || VT.is128BitVector()) {
1784 if (Subtarget->hasVLX())
1785 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1786 EltVT == MVT::f32 || EltVT == MVT::f64)
1787 switch(NumElts) {
1788 case 2: return MVT::v2i1;
1789 case 4: return MVT::v4i1;
1790 case 8: return MVT::v8i1;
1791 }
1792 if (Subtarget->hasBWI() && Subtarget->hasVLX())
1793 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1794 switch(NumElts) {
1795 case 8: return MVT::v8i1;
1796 case 16: return MVT::v16i1;
1797 case 32: return MVT::v32i1;
1798 }
1799 }
1801 return VT.changeVectorElementTypeToInteger();
1802 }
1804 /// Helper for getByValTypeAlignment to determine
1805 /// the desired ByVal argument alignment.
1806 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1807 if (MaxAlign == 16)
1808 return;
1809 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1810 if (VTy->getBitWidth() == 128)
1811 MaxAlign = 16;
1812 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1813 unsigned EltAlign = 0;
1814 getMaxByValAlign(ATy->getElementType(), EltAlign);
1815 if (EltAlign > MaxAlign)
1816 MaxAlign = EltAlign;
1817 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1818 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1819 unsigned EltAlign = 0;
1820 getMaxByValAlign(STy->getElementType(i), EltAlign);
1821 if (EltAlign > MaxAlign)
1822 MaxAlign = EltAlign;
1823 if (MaxAlign == 16)
1824 break;
1825 }
1826 }
1827 }
1829 /// Return the desired alignment for ByVal aggregate
1830 /// function arguments in the caller parameter area. For X86, aggregates
1831 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
1832 /// are at 4-byte boundaries.
1833 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
1834 if (Subtarget->is64Bit()) {
1835 // Max of 8 and alignment of type.
1836 unsigned TyAlign = TD->getABITypeAlignment(Ty);
1837 if (TyAlign > 8)
1838 return TyAlign;
1839 return 8;
1840 }
1842 unsigned Align = 4;
1843 if (Subtarget->hasSSE1())
1844 getMaxByValAlign(Ty, Align);
1845 return Align;
1846 }
1848 /// Returns the target specific optimal type for load
1849 /// and store operations as a result of memset, memcpy, and memmove
1850 /// lowering. If DstAlign is zero that means it's safe to destination
1851 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1852 /// means there isn't a need to check it against alignment requirement,
1853 /// probably because the source does not need to be loaded. If 'IsMemset' is
1854 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1855 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1856 /// source is constant so it does not need to be loaded.
1857 /// It returns EVT::Other if the type should be determined using generic
1858 /// target-independent logic.
1859 EVT
1860 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1861 unsigned DstAlign, unsigned SrcAlign,
1862 bool IsMemset, bool ZeroMemset,
1863 bool MemcpyStrSrc,
1864 MachineFunction &MF) const {
1865 const Function *F = MF.getFunction();
1866 if ((!IsMemset || ZeroMemset) &&
1867 !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
1868 Attribute::NoImplicitFloat)) {
1869 if (Size >= 16 &&
1870 (Subtarget->isUnalignedMemAccessFast() ||
1871 ((DstAlign == 0 || DstAlign >= 16) &&
1872 (SrcAlign == 0 || SrcAlign >= 16)))) {
1873 if (Size >= 32) {
1874 if (Subtarget->hasInt256())
1875 return MVT::v8i32;
1876 if (Subtarget->hasFp256())
1877 return MVT::v8f32;
1878 }
1879 if (Subtarget->hasSSE2())
1880 return MVT::v4i32;
1881 if (Subtarget->hasSSE1())
1882 return MVT::v4f32;
1883 } else if (!MemcpyStrSrc && Size >= 8 &&
1884 !Subtarget->is64Bit() &&
1885 Subtarget->hasSSE2()) {
1886 // Do not use f64 to lower memcpy if source is string constant. It's
1887 // better to use i32 to avoid the loads.
1888 return MVT::f64;
1889 }
1890 }
1891 if (Subtarget->is64Bit() && Size >= 8)
1892 return MVT::i64;
1893 return MVT::i32;
1894 }
1896 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1897 if (VT == MVT::f32)
1898 return X86ScalarSSEf32;
1899 else if (VT == MVT::f64)
1900 return X86ScalarSSEf64;
1901 return true;
1902 }
1904 bool
1905 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1906 unsigned,
1907 unsigned,
1908 bool *Fast) const {
1909 if (Fast)
1910 *Fast = Subtarget->isUnalignedMemAccessFast();
1911 return true;
1912 }
1914 /// Return the entry encoding for a jump table in the
1915 /// current function. The returned value is a member of the
1916 /// MachineJumpTableInfo::JTEntryKind enum.
1917 unsigned X86TargetLowering::getJumpTableEncoding() const {
1918 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1919 // symbol.
1920 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1921 Subtarget->isPICStyleGOT())
1922 return MachineJumpTableInfo::EK_Custom32;
1924 // Otherwise, use the normal jump table encoding heuristics.
1925 return TargetLowering::getJumpTableEncoding();
1926 }
1928 const MCExpr *
1929 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1930 const MachineBasicBlock *MBB,
1931 unsigned uid,MCContext &Ctx) const{
1932 assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
1933 Subtarget->isPICStyleGOT());
1934 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
1935 // entries.
1936 return MCSymbolRefExpr::Create(MBB->getSymbol(),
1937 MCSymbolRefExpr::VK_GOTOFF, Ctx);
1938 }
1940 /// Returns relocation base for the given PIC jumptable.
1941 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
1942 SelectionDAG &DAG) const {
1943 if (!Subtarget->is64Bit())
1944 // This doesn't have SDLoc associated with it, but is not really the
1945 // same as a Register.
1946 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
1947 return Table;
1948 }
1950 /// This returns the relocation base for the given PIC jumptable,
1951 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
1952 const MCExpr *X86TargetLowering::
1953 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
1954 MCContext &Ctx) const {
1955 // X86-64 uses RIP relative addressing based on the jump table label.
1956 if (Subtarget->isPICStyleRIPRel())
1957 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
1959 // Otherwise, the reference is relative to the PIC base.
1960 return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
1961 }
1963 // FIXME: Why this routine is here? Move to RegInfo!
1964 std::pair<const TargetRegisterClass*, uint8_t>
1965 X86TargetLowering::findRepresentativeClass(MVT VT) const{
1966 const TargetRegisterClass *RRC = nullptr;
1967 uint8_t Cost = 1;
1968 switch (VT.SimpleTy) {
1969 default:
1970 return TargetLowering::findRepresentativeClass(VT);
1971 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
1972 RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
1973 break;
1974 case MVT::x86mmx:
1975 RRC = &X86::VR64RegClass;
1976 break;
1977 case MVT::f32: case MVT::f64:
1978 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1979 case MVT::v4f32: case MVT::v2f64:
1980 case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
1981 case MVT::v4f64:
1982 RRC = &X86::VR128RegClass;
1983 break;
1984 }
1985 return std::make_pair(RRC, Cost);
1986 }
1988 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
1989 unsigned &Offset) const {
1990 if (!Subtarget->isTargetLinux())
1991 return false;
1993 if (Subtarget->is64Bit()) {
1994 // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
1995 Offset = 0x28;
1996 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
1997 AddressSpace = 256;
1998 else
1999 AddressSpace = 257;
2000 } else {
2001 // %gs:0x14 on i386
2002 Offset = 0x14;
2003 AddressSpace = 256;
2004 }
2005 return true;
2006 }
2008 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2009 unsigned DestAS) const {
2010 assert(SrcAS != DestAS && "Expected different address spaces!");
2012 return SrcAS < 256 && DestAS < 256;
2013 }
2015 //===----------------------------------------------------------------------===//
2016 // Return Value Calling Convention Implementation
2017 //===----------------------------------------------------------------------===//
2019 #include "X86GenCallingConv.inc"
2021 bool
2022 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2023 MachineFunction &MF, bool isVarArg,
2024 const SmallVectorImpl<ISD::OutputArg> &Outs,
2025 LLVMContext &Context) const {
2026 SmallVector<CCValAssign, 16> RVLocs;
2027 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2028 return CCInfo.CheckReturn(Outs, RetCC_X86);
2029 }
2031 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2032 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2033 return ScratchRegs;
2034 }
2036 SDValue
2037 X86TargetLowering::LowerReturn(SDValue Chain,
2038 CallingConv::ID CallConv, bool isVarArg,
2039 const SmallVectorImpl<ISD::OutputArg> &Outs,
2040 const SmallVectorImpl<SDValue> &OutVals,
2041 SDLoc dl, SelectionDAG &DAG) const {
2042 MachineFunction &MF = DAG.getMachineFunction();
2043 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2045 SmallVector<CCValAssign, 16> RVLocs;
2046 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2047 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2049 SDValue Flag;
2050 SmallVector<SDValue, 6> RetOps;
2051 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2052 // Operand #1 = Bytes To Pop
2053 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
2054 MVT::i16));
2056 // Copy the result values into the output registers.
2057 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2058 CCValAssign &VA = RVLocs[i];
2059 assert(VA.isRegLoc() && "Can only return in registers!");
2060 SDValue ValToCopy = OutVals[i];
2061 EVT ValVT = ValToCopy.getValueType();
2063 // Promote values to the appropriate types.
2064 if (VA.getLocInfo() == CCValAssign::SExt)
2065 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2066 else if (VA.getLocInfo() == CCValAssign::ZExt)
2067 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2068 else if (VA.getLocInfo() == CCValAssign::AExt)
2069 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2070 else if (VA.getLocInfo() == CCValAssign::BCvt)
2071 ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
2073 assert(VA.getLocInfo() != CCValAssign::FPExt &&
2074 "Unexpected FP-extend for return value.");
2076 // If this is x86-64, and we disabled SSE, we can't return FP values,
2077 // or SSE or MMX vectors.
2078 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2079 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2080 (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
2081 report_fatal_error("SSE register return with SSE disabled");
2082 }
2083 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2084 // llvm-gcc has never done it right and no one has noticed, so this
2085 // should be OK for now.
2086 if (ValVT == MVT::f64 &&
2087 (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
2088 report_fatal_error("SSE2 register return with SSE2 disabled");
2090 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2091 // the RET instruction and handled by the FP Stackifier.
2092 if (VA.getLocReg() == X86::FP0 ||
2093 VA.getLocReg() == X86::FP1) {
2094 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2095 // change the value to the FP stack register class.
2096 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2097 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2098 RetOps.push_back(ValToCopy);
2099 // Don't emit a copytoreg.
2100 continue;
2101 }
2103 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2104 // which is returned in RAX / RDX.
2105 if (Subtarget->is64Bit()) {
2106 if (ValVT == MVT::x86mmx) {
2107 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2108 ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
2109 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2110 ValToCopy);
2111 // If we don't have SSE2 available, convert to v4f32 so the generated
2112 // register is legal.
2113 if (!Subtarget->hasSSE2())
2114 ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
2115 }
2116 }
2117 }
2119 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
2120 Flag = Chain.getValue(1);
2121 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2122 }
2124 // The x86-64 ABIs require that for returning structs by value we copy
2125 // the sret argument into %rax/%eax (depending on ABI) for the return.
2126 // Win32 requires us to put the sret argument to %eax as well.
2127 // We saved the argument into a virtual register in the entry block,
2128 // so now we copy the value out and into %rax/%eax.
2129 if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
2130 (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
2131 MachineFunction &MF = DAG.getMachineFunction();
2132 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2133 unsigned Reg = FuncInfo->getSRetReturnReg();
2134 assert(Reg &&
2135 "SRetReturnReg should have been set in LowerFormalArguments().");
2136 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
2138 unsigned RetValReg
2139 = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
2140 X86::RAX : X86::EAX;
2141 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2142 Flag = Chain.getValue(1);
2144 // RAX/EAX now acts like a return value.
2145 RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
2146 }
2148 RetOps[0] = Chain; // Update chain.
2150 // Add the flag if we have it.
2151 if (Flag.getNode())
2152 RetOps.push_back(Flag);
2154 return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
2155 }
2157 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2158 if (N->getNumValues() != 1)
2159 return false;
2160 if (!N->hasNUsesOfValue(1, 0))
2161 return false;
2163 SDValue TCChain = Chain;
2164 SDNode *Copy = *N->use_begin();
2165 if (Copy->getOpcode() == ISD::CopyToReg) {
2166 // If the copy has a glue operand, we conservatively assume it isn't safe to
2167 // perform a tail call.
2168 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2169 return false;
2170 TCChain = Copy->getOperand(0);
2171 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2172 return false;
2174 bool HasRet = false;
2175 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2176 UI != UE; ++UI) {
2177 if (UI->getOpcode() != X86ISD::RET_FLAG)
2178 return false;
2179 // If we are returning more than one value, we can definitely
2180 // not make a tail call see PR19530
2181 if (UI->getNumOperands() > 4)
2182 return false;
2183 if (UI->getNumOperands() == 4 &&
2184 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2185 return false;
2186 HasRet = true;
2187 }
2189 if (!HasRet)
2190 return false;
2192 Chain = TCChain;
2193 return true;
2194 }
2196 EVT
2197 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
2198 ISD::NodeType ExtendKind) const {
2199 MVT ReturnMVT;
2200 // TODO: Is this also valid on 32-bit?
2201 if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
2202 ReturnMVT = MVT::i8;
2203 else
2204 ReturnMVT = MVT::i32;
2206 EVT MinVT = getRegisterType(Context, ReturnMVT);
2207 return VT.bitsLT(MinVT) ? MinVT : VT;
2208 }
2210 /// Lower the result values of a call into the
2211 /// appropriate copies out of appropriate physical registers.
2212 ///
2213 SDValue
2214 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
2215 CallingConv::ID CallConv, bool isVarArg,
2216 const SmallVectorImpl<ISD::InputArg> &Ins,
2217 SDLoc dl, SelectionDAG &DAG,
2218 SmallVectorImpl<SDValue> &InVals) const {
2220 // Assign locations to each value returned by this call.
2221 SmallVector<CCValAssign, 16> RVLocs;
2222 bool Is64Bit = Subtarget->is64Bit();
2223 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2224 *DAG.getContext());
2225 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2227 // Copy all of the result registers out of their specified physreg.
2228 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2229 CCValAssign &VA = RVLocs[i];
2230 EVT CopyVT = VA.getValVT();
2232 // If this is x86-64, and we disabled SSE, we can't return FP values
2233 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
2234 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
2235 report_fatal_error("SSE register return with SSE disabled");
2236 }
2238 // If we prefer to use the value in xmm registers, copy it out as f80 and
2239 // use a truncate to move it from fp stack reg to xmm reg.
2240 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2241 isScalarFPTypeInSSEReg(VA.getValVT()))
2242 CopyVT = MVT::f80;
2244 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
2245 CopyVT, InFlag).getValue(1);
2246 SDValue Val = Chain.getValue(0);
2248 if (CopyVT != VA.getValVT())
2249 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2250 // This truncation won't change the value.
2251 DAG.getIntPtrConstant(1));
2253 InFlag = Chain.getValue(2);
2254 InVals.push_back(Val);
2255 }
2257 return Chain;
2258 }
2260 //===----------------------------------------------------------------------===//
2261 // C & StdCall & Fast Calling Convention implementation
2262 //===----------------------------------------------------------------------===//
2263 // StdCall calling convention seems to be standard for many Windows' API
2264 // routines and around. It differs from C calling convention just a little:
2265 // callee should clean up the stack, not caller. Symbols should be also
2266 // decorated in some fancy way :) It doesn't support any vector arguments.
2267 // For info on fast calling convention see Fast Calling Convention (tail call)
2268 // implementation LowerX86_32FastCCCallTo.
2270 /// CallIsStructReturn - Determines whether a call uses struct return
2271 /// semantics.
2272 enum StructReturnType {
2273 NotStructReturn,
2274 RegStructReturn,
2275 StackStructReturn
2276 };
2277 static StructReturnType
2278 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
2279 if (Outs.empty())
2280 return NotStructReturn;
2282 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2283 if (!Flags.isSRet())
2284 return NotStructReturn;
2285 if (Flags.isInReg())
2286 return RegStructReturn;
2287 return StackStructReturn;
2288 }
2290 /// Determines whether a function uses struct return semantics.
2291 static StructReturnType
2292 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
2293 if (Ins.empty())
2294 return NotStructReturn;
2296 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2297 if (!Flags.isSRet())
2298 return NotStructReturn;
2299 if (Flags.isInReg())
2300 return RegStructReturn;
2301 return StackStructReturn;
2302 }
2304 /// Make a copy of an aggregate at address specified by "Src" to address
2305 /// "Dst" with size and alignment information specified by the specific
2306 /// parameter attribute. The copy will be passed as a byval function parameter.
2307 static SDValue
2308 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2309 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2310 SDLoc dl) {
2311 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2313 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2314 /*isVolatile*/false, /*AlwaysInline=*/true,
2315 MachinePointerInfo(), MachinePointerInfo());
2316 }
2318 /// Return true if the calling convention is one that
2319 /// supports tail call optimization.
2320 static bool IsTailCallConvention(CallingConv::ID CC) {
2321 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2322 CC == CallingConv::HiPE);
2323 }
2325 /// \brief Return true if the calling convention is a C calling convention.
2326 static bool IsCCallConvention(CallingConv::ID CC) {
2327 return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
2328 CC == CallingConv::X86_64_SysV);
2329 }
2331 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2332 if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
2333 return false;
2335 CallSite CS(CI);
2336 CallingConv::ID CalleeCC = CS.getCallingConv();
2337 if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
2338 return false;
2340 return true;
2341 }
2343 /// Return true if the function is being made into
2344 /// a tailcall target by changing its ABI.
2345 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
2346 bool GuaranteedTailCallOpt) {
2347 return GuaranteedTailCallOpt && IsTailCallConvention(CC);
2348 }
2350 SDValue
2351 X86TargetLowering::LowerMemArgument(SDValue Chain,
2352 CallingConv::ID CallConv,
2353 const SmallVectorImpl<ISD::InputArg> &Ins,
2354 SDLoc dl, SelectionDAG &DAG,
2355 const CCValAssign &VA,
2356 MachineFrameInfo *MFI,
2357 unsigned i) const {
2358 // Create the nodes corresponding to a load from this parameter slot.
2359 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2360 bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
2361 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2362 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2363 EVT ValVT;
2365 // If value is passed by pointer we have address passed instead of the value
2366 // itself.
2367 if (VA.getLocInfo() == CCValAssign::Indirect)
2368 ValVT = VA.getLocVT();
2369 else
2370 ValVT = VA.getValVT();
2372 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2373 // changed with more analysis.
2374 // In case of tail call optimization mark all arguments mutable. Since they
2375 // could be overwritten by lowering of arguments in case of a tail call.
2376 if (Flags.isByVal()) {
2377 unsigned Bytes = Flags.getByValSize();
2378 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2379 int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2380 return DAG.getFrameIndex(FI, getPointerTy());
2381 } else {
2382 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
2383 VA.getLocMemOffset(), isImmutable);
2384 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2385 return DAG.getLoad(ValVT, dl, Chain, FIN,
2386 MachinePointerInfo::getFixedStack(FI),
2387 false, false, false, 0);
2388 }
2389 }
2391 // FIXME: Get this from tablegen.
2392 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2393 const X86Subtarget *Subtarget) {
2394 assert(Subtarget->is64Bit());
2396 if (Subtarget->isCallingConvWin64(CallConv)) {
2397 static const MCPhysReg GPR64ArgRegsWin64[] = {
2398 X86::RCX, X86::RDX, X86::R8, X86::R9
2399 };
2400 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2401 }
2403 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2404 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2405 };
2406 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2407 }
2409 // FIXME: Get this from tablegen.
2410 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2411 CallingConv::ID CallConv,
2412 const X86Subtarget *Subtarget) {
2413 assert(Subtarget->is64Bit());
2414 if (Subtarget->isCallingConvWin64(CallConv)) {
2415 // The XMM registers which might contain var arg parameters are shadowed
2416 // in their paired GPR. So we only need to save the GPR to their home
2417 // slots.
2418 // TODO: __vectorcall will change this.
2419 return None;
2420 }
2422 const Function *Fn = MF.getFunction();
2423 bool NoImplicitFloatOps = Fn->getAttributes().
2424 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
2425 assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
2426 "SSE register cannot be used when SSE is disabled!");
2427 if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
2428 !Subtarget->hasSSE1())
2429 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2430 // registers.
2431 return None;
2433 static const MCPhysReg XMMArgRegs64Bit[] = {
2434 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2435 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2436 };
2437 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2438 }
2440 SDValue
2441 X86TargetLowering::LowerFormalArguments(SDValue Chain,
2442 CallingConv::ID CallConv,
2443 bool isVarArg,
2444 const SmallVectorImpl<ISD::InputArg> &Ins,
2445 SDLoc dl,
2446 SelectionDAG &DAG,
2447 SmallVectorImpl<SDValue> &InVals)
2448 const {
2449 MachineFunction &MF = DAG.getMachineFunction();
2450 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2452 const Function* Fn = MF.getFunction();
2453 if (Fn->hasExternalLinkage() &&
2454 Subtarget->isTargetCygMing() &&
2455 Fn->getName() == "main")
2456 FuncInfo->setForceFramePointer(true);
2458 MachineFrameInfo *MFI = MF.getFrameInfo();
2459 bool Is64Bit = Subtarget->is64Bit();
2460 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2462 assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
2463 "Var args not supported with calling convention fastcc, ghc or hipe");
2465 // Assign locations to all of the incoming arguments.
2466 SmallVector<CCValAssign, 16> ArgLocs;
2467 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2469 // Allocate shadow area for Win64
2470 if (IsWin64)
2471 CCInfo.AllocateStack(32, 8);
2473 CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
2475 unsigned LastVal = ~0U;
2476 SDValue ArgValue;
2477 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2478 CCValAssign &VA = ArgLocs[i];
2479 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
2480 // places.
2481 assert(VA.getValNo() != LastVal &&
2482 "Don't support value assigned to multiple locs yet");
2483 (void)LastVal;
2484 LastVal = VA.getValNo();
2486 if (VA.isRegLoc()) {
2487 EVT RegVT = VA.getLocVT();
2488 const TargetRegisterClass *RC;
2489 if (RegVT == MVT::i32)
2490 RC = &X86::GR32RegClass;
2491 else if (Is64Bit && RegVT == MVT::i64)
2492 RC = &X86::GR64RegClass;
2493 else if (RegVT == MVT::f32)
2494 RC = &X86::FR32RegClass;
2495 else if (RegVT == MVT::f64)
2496 RC = &X86::FR64RegClass;
2497 else if (RegVT.is512BitVector())
2498 RC = &X86::VR512RegClass;
2499 else if (RegVT.is256BitVector())
2500 RC = &X86::VR256RegClass;
2501 else if (RegVT.is128BitVector())
2502 RC = &X86::VR128RegClass;
2503 else if (RegVT == MVT::x86mmx)
2504 RC = &X86::VR64RegClass;
2505 else if (RegVT == MVT::i1)
2506 RC = &X86::VK1RegClass;
2507 else if (RegVT == MVT::v8i1)
2508 RC = &X86::VK8RegClass;
2509 else if (RegVT == MVT::v16i1)
2510 RC = &X86::VK16RegClass;
2511 else if (RegVT == MVT::v32i1)
2512 RC = &X86::VK32RegClass;
2513 else if (RegVT == MVT::v64i1)
2514 RC = &X86::VK64RegClass;
2515 else
2516 llvm_unreachable("Unknown argument type!");
2518 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2519 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2521 // If this is an 8 or 16-bit value, it is really passed promoted to 32
2522 // bits. Insert an assert[sz]ext to capture this, then truncate to the
2523 // right size.
2524 if (VA.getLocInfo() == CCValAssign::SExt)
2525 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2526 DAG.getValueType(VA.getValVT()));
2527 else if (VA.getLocInfo() == CCValAssign::ZExt)
2528 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2529 DAG.getValueType(VA.getValVT()));
2530 else if (VA.getLocInfo() == CCValAssign::BCvt)
2531 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
2533 if (VA.isExtInLoc()) {
2534 // Handle MMX values passed in XMM regs.
2535 if (RegVT.isVector())
2536 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
2537 else
2538 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2539 }
2540 } else {
2541 assert(VA.isMemLoc());
2542 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
2543 }
2545 // If value is passed via pointer - do a load.
2546 if (VA.getLocInfo() == CCValAssign::Indirect)
2547 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
2548 MachinePointerInfo(), false, false, false, 0);
2550 InVals.push_back(ArgValue);
2551 }
2553 if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) {
2554 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2555 // The x86-64 ABIs require that for returning structs by value we copy
2556 // the sret argument into %rax/%eax (depending on ABI) for the return.
2557 // Win32 requires us to put the sret argument to %eax as well.
2558 // Save the argument into a virtual register so that we can access it
2559 // from the return points.
2560 if (Ins[i].Flags.isSRet()) {
2561 unsigned Reg = FuncInfo->getSRetReturnReg();
2562 if (!Reg) {
2563 MVT PtrTy = getPointerTy();
2564 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
2565 FuncInfo->setSRetReturnReg(Reg);
2566 }
2567 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
2568 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
2569 break;
2570 }
2571 }
2572 }
2574 unsigned StackSize = CCInfo.getNextStackOffset();
2575 // Align stack specially for tail calls.
2576 if (FuncIsMadeTailCallSafe(CallConv,
2577 MF.getTarget().Options.GuaranteedTailCallOpt))
2578 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
2580 // If the function takes variable number of arguments, make a frame index for
2581 // the start of the first vararg value... for expansion of llvm.va_start. We
2582 // can skip this if there are no va_start calls.
2583 if (MFI->hasVAStart() &&
2584 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
2585 CallConv != CallingConv::X86_ThisCall))) {
2586 FuncInfo->setVarArgsFrameIndex(
2587 MFI->CreateFixedObject(1, StackSize, true));
2588 }
2590 // Figure out if XMM registers are in use.
2591 assert(!(MF.getTarget().Options.UseSoftFloat &&
2592 Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
2593 Attribute::NoImplicitFloat)) &&
2594 "SSE register cannot be used when SSE is disabled!");
2596 // 64-bit calling conventions support varargs and register parameters, so we
2597 // have to do extra work to spill them in the prologue.
2598 if (Is64Bit && isVarArg && MFI->hasVAStart()) {
2599 // Find the first unallocated argument registers.
2600 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
2601 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
2602 unsigned NumIntRegs =
2603 CCInfo.getFirstUnallocated(ArgGPRs.data(), ArgGPRs.size());
2604 unsigned NumXMMRegs =
2605 CCInfo.getFirstUnallocated(ArgXMMs.data(), ArgXMMs.size());
2606 assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
2607 "SSE register cannot be used when SSE is disabled!");
2609 // Gather all the live in physical registers.
2610 SmallVector<SDValue, 6> LiveGPRs;
2611 SmallVector<SDValue, 8> LiveXMMRegs;
2612 SDValue ALVal;
2613 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
2614 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
2615 LiveGPRs.push_back(
2616 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
2617 }
2618 if (!ArgXMMs.empty()) {
2619 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2620 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
2621 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
2622 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
2623 LiveXMMRegs.push_back(
2624 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
2625 }
2626 }
2628 if (IsWin64) {
2629 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
2630 // Get to the caller-allocated home save location. Add 8 to account
2631 // for the return address.
2632 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
2633 FuncInfo->setRegSaveFrameIndex(
2634 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
2635 // Fixup to set vararg frame on shadow area (4 x i64).
2636 if (NumIntRegs < 4)
2637 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
2638 } else {
2639 // For X86-64, if there are vararg parameters that are passed via
2640 // registers, then we must store them to their spots on the stack so
2641 // they may be loaded by deferencing the result of va_next.
2642 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
2643 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
2644 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
2645 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
2646 }
2648 // Store the integer parameter registers.
2649 SmallVector<SDValue, 8> MemOps;
2650 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
2651 getPointerTy());
2652 unsigned Offset = FuncInfo->getVarArgsGPOffset();
2653 for (SDValue Val : LiveGPRs) {
2654 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
2655 DAG.getIntPtrConstant(Offset));
2656 SDValue Store =
2657 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2658 MachinePointerInfo::getFixedStack(
2659 FuncInfo->getRegSaveFrameIndex(), Offset),
2660 false, false, 0);
2661 MemOps.push_back(Store);
2662 Offset += 8;
2663 }
2665 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
2666 // Now store the XMM (fp + vector) parameter registers.
2667 SmallVector<SDValue, 12> SaveXMMOps;
2668 SaveXMMOps.push_back(Chain);
2669 SaveXMMOps.push_back(ALVal);
2670 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2671 FuncInfo->getRegSaveFrameIndex()));
2672 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2673 FuncInfo->getVarArgsFPOffset()));
2674 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
2675 LiveXMMRegs.end());
2676 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
2677 MVT::Other, SaveXMMOps));
2678 }
2680 if (!MemOps.empty())
2681 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2682 }
2684 if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
2685 // Find the largest legal vector type.
2686 MVT VecVT = MVT::Other;
2687 // FIXME: Only some x86_32 calling conventions support AVX512.
2688 if (Subtarget->hasAVX512() &&
2689 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
2690 CallConv == CallingConv::Intel_OCL_BI)))
2691 VecVT = MVT::v16f32;
2692 else if (Subtarget->hasAVX())
2693 VecVT = MVT::v8f32;
2694 else if (Subtarget->hasSSE2())
2695 VecVT = MVT::v4f32;
2697 // We forward some GPRs and some vector types.
2698 SmallVector<MVT, 2> RegParmTypes;
2699 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
2700 RegParmTypes.push_back(IntVT);
2701 if (VecVT != MVT::Other)
2702 RegParmTypes.push_back(VecVT);
2704 // Compute the set of forwarded registers. The rest are scratch.
2705 SmallVectorImpl<ForwardedRegister> &Forwards =
2706 FuncInfo->getForwardedMustTailRegParms();
2707 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
2709 // Conservatively forward AL on x86_64, since it might be used for varargs.
2710 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
2711 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2712 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
2713 }
2715 // Copy all forwards from physical to virtual registers.
2716 for (ForwardedRegister &F : Forwards) {
2717 // FIXME: Can we use a less constrained schedule?
2718 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2719 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
2720 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
2721 }
2722 }
2724 // Some CCs need callee pop.
2725 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2726 MF.getTarget().Options.GuaranteedTailCallOpt)) {
2727 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
2728 } else {
2729 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
2730 // If this is an sret function, the return should pop the hidden pointer.
2731 if (!Is64Bit && !IsTailCallConvention(CallConv) &&
2732 !Subtarget->getTargetTriple().isOSMSVCRT() &&
2733 argsAreStructReturn(Ins) == StackStructReturn)
2734 FuncInfo->setBytesToPopOnReturn(4);
2735 }
2737 if (!Is64Bit) {
2738 // RegSaveFrameIndex is X86-64 only.
2739 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
2740 if (CallConv == CallingConv::X86_FastCall ||
2741 CallConv == CallingConv::X86_ThisCall)
2742 // fastcc functions can't have varargs.
2743 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
2744 }
2746 FuncInfo->setArgumentStackSize(StackSize);
2748 return Chain;
2749 }
2751 SDValue
2752 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
2753 SDValue StackPtr, SDValue Arg,
2754 SDLoc dl, SelectionDAG &DAG,
2755 const CCValAssign &VA,
2756 ISD::ArgFlagsTy Flags) const {
2757 unsigned LocMemOffset = VA.getLocMemOffset();
2758 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
2759 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
2760 if (Flags.isByVal())
2761 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
2763 return DAG.getStore(Chain, dl, Arg, PtrOff,
2764 MachinePointerInfo::getStack(LocMemOffset),
2765 false, false, 0);
2766 }
2768 /// Emit a load of return address if tail call
2769 /// optimization is performed and it is required.
2770 SDValue
2771 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
2772 SDValue &OutRetAddr, SDValue Chain,
2773 bool IsTailCall, bool Is64Bit,
2774 int FPDiff, SDLoc dl) const {
2775 // Adjust the Return address stack slot.
2776 EVT VT = getPointerTy();
2777 OutRetAddr = getReturnAddressFrameIndex(DAG);
2779 // Load the "old" Return address.
2780 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
2781 false, false, false, 0);
2782 return SDValue(OutRetAddr.getNode(), 1);
2783 }
2785 /// Emit a store of the return address if tail call
2786 /// optimization is performed and it is required (FPDiff!=0).
2787 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
2788 SDValue Chain, SDValue RetAddrFrIdx,
2789 EVT PtrVT, unsigned SlotSize,
2790 int FPDiff, SDLoc dl) {
2791 // Store the return address to the appropriate stack slot.
2792 if (!FPDiff) return Chain;
2793 // Calculate the new stack slot for the return address.
2794 int NewReturnAddrFI =
2795 MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
2796 false);
2797 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
2798 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
2799 MachinePointerInfo::getFixedStack(NewReturnAddrFI),
2800 false, false, 0);
2801 return Chain;
2802 }
2804 SDValue
2805 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2806 SmallVectorImpl<SDValue> &InVals) const {
2807 SelectionDAG &DAG = CLI.DAG;
2808 SDLoc &dl = CLI.DL;
2809 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2810 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2811 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2812 SDValue Chain = CLI.Chain;
2813 SDValue Callee = CLI.Callee;
2814 CallingConv::ID CallConv = CLI.CallConv;
2815 bool &isTailCall = CLI.IsTailCall;
2816 bool isVarArg = CLI.IsVarArg;
2818 MachineFunction &MF = DAG.getMachineFunction();
2819 bool Is64Bit = Subtarget->is64Bit();
2820 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2821 StructReturnType SR = callIsStructReturn(Outs);
2822 bool IsSibcall = false;
2823 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2825 if (MF.getTarget().Options.DisableTailCalls)
2826 isTailCall = false;
2828 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
2829 if (IsMustTail) {
2830 // Force this to be a tail call. The verifier rules are enough to ensure
2831 // that we can lower this successfully without moving the return address
2832 // around.
2833 isTailCall = true;
2834 } else if (isTailCall) {
2835 // Check if it's really possible to do a tail call.
2836 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
2837 isVarArg, SR != NotStructReturn,
2838 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
2839 Outs, OutVals, Ins, DAG);
2841 // Sibcalls are automatically detected tailcalls which do not require
2842 // ABI changes.
2843 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
2844 IsSibcall = true;
2846 if (isTailCall)
2847 ++NumTailCalls;
2848 }
2850 assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
2851 "Var args not supported with calling convention fastcc, ghc or hipe");
2853 // Analyze operands of the call, assigning locations to each operand.
2854 SmallVector<CCValAssign, 16> ArgLocs;
2855 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2857 // Allocate shadow area for Win64
2858 if (IsWin64)
2859 CCInfo.AllocateStack(32, 8);
2861 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
2863 // Get a count of how many bytes are to be pushed on the stack.
2864 unsigned NumBytes = CCInfo.getNextStackOffset();
2865 if (IsSibcall)
2866 // This is a sibcall. The memory operands are available in caller's
2867 // own caller's stack.
2868 NumBytes = 0;
2869 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2870 IsTailCallConvention(CallConv))
2871 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2873 int FPDiff = 0;
2874 if (isTailCall && !IsSibcall && !IsMustTail) {
2875 // Lower arguments at fp - stackoffset + fpdiff.
2876 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2878 FPDiff = NumBytesCallerPushed - NumBytes;
2880 // Set the delta of movement of the returnaddr stackslot.
2881 // But only set if delta is greater than previous delta.
2882 if (FPDiff < X86Info->getTCReturnAddrDelta())
2883 X86Info->setTCReturnAddrDelta(FPDiff);
2884 }
2886 unsigned NumBytesToPush = NumBytes;
2887 unsigned NumBytesToPop = NumBytes;
2889 // If we have an inalloca argument, all stack space has already been allocated
2890 // for us and be right at the top of the stack. We don't support multiple
2891 // arguments passed in memory when using inalloca.
2892 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2893 NumBytesToPush = 0;
2894 if (!ArgLocs.back().isMemLoc())
2895 report_fatal_error("cannot use inalloca attribute on a register "
2896 "parameter");
2897 if (ArgLocs.back().getLocMemOffset() != 0)
2898 report_fatal_error("any parameter with the inalloca attribute must be "
2899 "the only memory argument");
2900 }
2902 if (!IsSibcall)
2903 Chain = DAG.getCALLSEQ_START(
2904 Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
2906 SDValue RetAddrFrIdx;
2907 // Load return address for tail calls.
2908 if (isTailCall && FPDiff)
2909 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2910 Is64Bit, FPDiff, dl);
2912 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2913 SmallVector<SDValue, 8> MemOpChains;
2914 SDValue StackPtr;
2916 // Walk the register/memloc assignments, inserting copies/loads. In the case
2917 // of tail call optimization arguments are handle later.
2918 const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
2919 DAG.getSubtarget().getRegisterInfo());
2920 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2921 // Skip inalloca arguments, they have already been written.
2922 ISD::ArgFlagsTy Flags = Outs[i].Flags;
2923 if (Flags.isInAlloca())
2924 continue;
2926 CCValAssign &VA = ArgLocs[i];
2927 EVT RegVT = VA.getLocVT();
2928 SDValue Arg = OutVals[i];
2929 bool isByVal = Flags.isByVal();
2931 // Promote the value if needed.
2932 switch (VA.getLocInfo()) {
2933 default: llvm_unreachable("Unknown loc info!");
2934 case CCValAssign::Full: break;
2935 case CCValAssign::SExt:
2936 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2937 break;
2938 case CCValAssign::ZExt:
2939 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2940 break;
2941 case CCValAssign::AExt:
2942 if (RegVT.is128BitVector()) {
2943 // Special case: passing MMX values in XMM registers.
2944 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
2945 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2946 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2947 } else
2948 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2949 break;
2950 case CCValAssign::BCvt:
2951 Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
2952 break;
2953 case CCValAssign::Indirect: {
2954 // Store the argument.
2955 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2956 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2957 Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
2958 MachinePointerInfo::getFixedStack(FI),
2959 false, false, 0);
2960 Arg = SpillSlot;
2961 break;
2962 }
2963 }
2965 if (VA.isRegLoc()) {
2966 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2967 if (isVarArg && IsWin64) {
2968 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2969 // shadow reg if callee is a varargs function.
2970 unsigned ShadowReg = 0;
2971 switch (VA.getLocReg()) {
2972 case X86::XMM0: ShadowReg = X86::RCX; break;
2973 case X86::XMM1: ShadowReg = X86::RDX; break;
2974 case X86::XMM2: ShadowReg = X86::R8; break;
2975 case X86::XMM3: ShadowReg = X86::R9; break;
2976 }
2977 if (ShadowReg)
2978 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2979 }
2980 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2981 assert(VA.isMemLoc());
2982 if (!StackPtr.getNode())
2983 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2984 getPointerTy());
2985 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2986 dl, DAG, VA, Flags));
2987 }
2988 }
2990 if (!MemOpChains.empty())
2991 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2993 if (Subtarget->isPICStyleGOT()) {
2994 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2995 // GOT pointer.
2996 if (!isTailCall) {
2997 RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
2998 DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
2999 } else {
3000 // If we are tail calling and generating PIC/GOT style code load the
3001 // address of the callee into ECX. The value in ecx is used as target of
3002 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3003 // for tail calls on PIC/GOT architectures. Normally we would just put the
3004 // address of GOT into ebx and then call target@PLT. But for tail calls
3005 // ebx would be restored (since ebx is callee saved) before jumping to the
3006 // target@PLT.
3008 // Note: The actual moving to ECX is done further down.
3009 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3010 if (G && !G->getGlobal()->hasHiddenVisibility() &&
3011 !G->getGlobal()->hasProtectedVisibility())
3012 Callee = LowerGlobalAddress(Callee, DAG);
3013 else if (isa<ExternalSymbolSDNode>(Callee))
3014 Callee = LowerExternalSymbol(Callee, DAG);
3015 }
3016 }
3018 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3019 // From AMD64 ABI document:
3020 // For calls that may call functions that use varargs or stdargs
3021 // (prototype-less calls or calls to functions containing ellipsis (...) in
3022 // the declaration) %al is used as hidden argument to specify the number
3023 // of SSE registers used. The contents of %al do not need to match exactly
3024 // the number of registers, but must be an ubound on the number of SSE
3025 // registers used and is in the range 0 - 8 inclusive.
3027 // Count the number of XMM registers allocated.
3028 static const MCPhysReg XMMArgRegs[] = {
3029 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3030 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3031 };
3032 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
3033 assert((Subtarget->hasSSE1() || !NumXMMRegs)
3034 && "SSE registers cannot be used when SSE is disabled");
3036 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3037 DAG.getConstant(NumXMMRegs, MVT::i8)));
3038 }
3040 if (isVarArg && IsMustTail) {
3041 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3042 for (const auto &F : Forwards) {
3043 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3044 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3045 }
3046 }
3048 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3049 // don't need this because the eligibility check rejects calls that require
3050 // shuffling arguments passed in memory.
3051 if (!IsSibcall && isTailCall) {
3052 // Force all the incoming stack arguments to be loaded from the stack
3053 // before any new outgoing arguments are stored to the stack, because the
3054 // outgoing stack slots may alias the incoming argument stack slots, and
3055 // the alias isn't otherwise explicit. This is slightly more conservative
3056 // than necessary, because it means that each store effectively depends
3057 // on every argument instead of just those arguments it would clobber.
3058 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3060 SmallVector<SDValue, 8> MemOpChains2;
3061 SDValue FIN;
3062 int FI = 0;
3063 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3064 CCValAssign &VA = ArgLocs[i];
3065 if (VA.isRegLoc())
3066 continue;
3067 assert(VA.isMemLoc());
3068 SDValue Arg = OutVals[i];
3069 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3070 // Skip inalloca arguments. They don't require any work.
3071 if (Flags.isInAlloca())
3072 continue;
3073 // Create frame index.
3074 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3075 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3076 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3077 FIN = DAG.getFrameIndex(FI, getPointerTy());
3079 if (Flags.isByVal()) {
3080 // Copy relative to framepointer.
3081 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
3082 if (!StackPtr.getNode())
3083 StackPtr = DAG.getCopyFromReg(Chain, dl,
3084 RegInfo->getStackRegister(),
3085 getPointerTy());
3086 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
3088 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3089 ArgChain,
3090 Flags, DAG, dl));
3091 } else {
3092 // Store relative to framepointer.
3093 MemOpChains2.push_back(
3094 DAG.getStore(ArgChain, dl, Arg, FIN,
3095 MachinePointerInfo::getFixedStack(FI),
3096 false, false, 0));
3097 }
3098 }
3100 if (!MemOpChains2.empty())
3101 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3103 // Store the return address to the appropriate stack slot.
3104 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3105 getPointerTy(), RegInfo->getSlotSize(),
3106 FPDiff, dl);
3107 }
3109 // Build a sequence of copy-to-reg nodes chained together with token chain
3110 // and flag operands which copy the outgoing args into registers.
3111 SDValue InFlag;
3112 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3113 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3114 RegsToPass[i].second, InFlag);
3115 InFlag = Chain.getValue(1);
3116 }
3118 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3119 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
3120 // In the 64-bit large code model, we have to make all calls
3121 // through a register, since the call instruction's 32-bit
3122 // pc-relative offset may not be large enough to hold the whole
3123 // address.
3124 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3125 // If the callee is a GlobalAddress node (quite common, every direct call
3126 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3127 // it.
3128 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3130 // We should use extra load for direct calls to dllimported functions in
3131 // non-JIT mode.
3132 const GlobalValue *GV = G->getGlobal();
3133 if (!GV->hasDLLImportStorageClass()) {
3134 unsigned char OpFlags = 0;
3135 bool ExtraLoad = false;
3136 unsigned WrapperKind = ISD::DELETED_NODE;
3138 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
3139 // external symbols most go through the PLT in PIC mode. If the symbol
3140 // has hidden or protected visibility, or if it is static or local, then
3141 // we don't need to use the PLT - we can directly call it.
3142 if (Subtarget->isTargetELF() &&
3143 DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
3144 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
3145 OpFlags = X86II::MO_PLT;
3146 } else if (Subtarget->isPICStyleStubAny() &&
3147 (GV->isDeclaration() || GV->isWeakForLinker()) &&
3148 (!Subtarget->getTargetTriple().isMacOSX() ||
3149 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
3150 // PC-relative references to external symbols should go through $stub,
3151 // unless we're building with the leopard linker or later, which
3152 // automatically synthesizes these stubs.
3153 OpFlags = X86II::MO_DARWIN_STUB;
3154 } else if (Subtarget->isPICStyleRIPRel() &&
3155 isa<Function>(GV) &&
3156 cast<Function>(GV)->getAttributes().
3157 hasAttribute(AttributeSet::FunctionIndex,
3158 Attribute::NonLazyBind)) {
3159 // If the function is marked as non-lazy, generate an indirect call
3160 // which loads from the GOT directly. This avoids runtime overhead
3161 // at the cost of eager binding (and one extra byte of encoding).
3162 OpFlags = X86II::MO_GOTPCREL;
3163 WrapperKind = X86ISD::WrapperRIP;
3164 ExtraLoad = true;
3165 }
3167 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
3168 G->getOffset(), OpFlags);
3170 // Add a wrapper if needed.
3171 if (WrapperKind != ISD::DELETED_NODE)
3172 Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
3173 // Add extra indirection if needed.
3174 if (ExtraLoad)
3175 Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
3176 MachinePointerInfo::getGOT(),
3177 false, false, false, 0);
3178 }
3179 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3180 unsigned char OpFlags = 0;
3182 // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
3183 // external symbols should go through the PLT.
3184 if (Subtarget->isTargetELF() &&
3185 DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
3186 OpFlags = X86II::MO_PLT;
3187 } else if (Subtarget->isPICStyleStubAny() &&
3188 (!Subtarget->getTargetTriple().isMacOSX() ||
3189 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
3190 // PC-relative references to external symbols should go through $stub,
3191 // unless we're building with the leopard linker or later, which
3192 // automatically synthesizes these stubs.
3193 OpFlags = X86II::MO_DARWIN_STUB;
3194 }
3196 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
3197 OpFlags);
3198 } else if (Subtarget->isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) {
3199 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3200 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3201 }
3203 // Returns a chain & a flag for retval copy to use.
3204 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3205 SmallVector<SDValue, 8> Ops;
3207 if (!IsSibcall && isTailCall) {
3208 Chain = DAG.getCALLSEQ_END(Chain,
3209 DAG.getIntPtrConstant(NumBytesToPop, true),
3210 DAG.getIntPtrConstant(0, true), InFlag, dl);
3211 InFlag = Chain.getValue(1);
3212 }
3214 Ops.push_back(Chain);
3215 Ops.push_back(Callee);
3217 if (isTailCall)
3218 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
3220 // Add argument registers to the end of the list so that they are known live
3221 // into the call.
3222 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3223 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3224 RegsToPass[i].second.getValueType()));
3226 // Add a register mask operand representing the call-preserved registers.
3227 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
3228 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
3229 assert(Mask && "Missing call preserved mask for calling convention");
3230 Ops.push_back(DAG.getRegisterMask(Mask));
3232 if (InFlag.getNode())
3233 Ops.push_back(InFlag);
3235 if (isTailCall) {
3236 // We used to do:
3237 //// If this is the first return lowered for this function, add the regs
3238 //// to the liveout set for the function.
3239 // This isn't right, although it's probably harmless on x86; liveouts
3240 // should be computed from returns not tail calls. Consider a void
3241 // function making a tail call to a function returning int.
3242 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3243 }
3245 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3246 InFlag = Chain.getValue(1);
3248 // Create the CALLSEQ_END node.
3249 unsigned NumBytesForCalleeToPop;
3250 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3251 DAG.getTarget().Options.GuaranteedTailCallOpt))
3252 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3253 else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
3254 !Subtarget->getTargetTriple().isOSMSVCRT() &&
3255 SR == StackStructReturn)
3256 // If this is a call to a struct-return function, the callee
3257 // pops the hidden struct pointer, so we have to push it back.
3258 // This is common for Darwin/X86, Linux & Mingw32 targets.
3259 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3260 NumBytesForCalleeToPop = 4;
3261 else
3262 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3264 // Returns a flag for retval copy to use.
3265 if (!IsSibcall) {
3266 Chain = DAG.getCALLSEQ_END(Chain,
3267 DAG.getIntPtrConstant(NumBytesToPop, true),
3268 DAG.getIntPtrConstant(NumBytesForCalleeToPop,
3269 true),
3270 InFlag, dl);
3271 InFlag = Chain.getValue(1);
3272 }
3274 // Handle result values, copying them out of physregs into vregs that we
3275 // return.
3276 return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
3277 Ins, dl, DAG, InVals);
3278 }
3280 //===----------------------------------------------------------------------===//
3281 // Fast Calling Convention (tail call) implementation
3282 //===----------------------------------------------------------------------===//
3284 // Like std call, callee cleans arguments, convention except that ECX is
3285 // reserved for storing the tail called function address. Only 2 registers are
3286 // free for argument passing (inreg). Tail call optimization is performed
3287 // provided:
3288 // * tailcallopt is enabled
3289 // * caller/callee are fastcc
3290 // On X86_64 architecture with GOT-style position independent code only local
3291 // (within module) calls are supported at the moment.
3292 // To keep the stack aligned according to platform abi the function
3293 // GetAlignedArgumentStackSize ensures that argument delta is always multiples
3294 // of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
3295 // If a tail called function callee has more arguments than the caller the
3296 // caller needs to make sure that there is room to move the RETADDR to. This is
3297 // achieved by reserving an area the size of the argument delta right after the
3298 // original RETADDR, but before the saved framepointer or the spilled registers
3299 // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
3300 // stack layout:
3301 // arg1
3302 // arg2
3303 // RETADDR
3304 // [ new RETADDR
3305 // move area ]
3306 // (possible EBP)
3307 // ESI
3308 // EDI
3309 // local1 ..
3311 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
3312 /// for a 16 byte align requirement.
3313 unsigned
3314 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
3315 SelectionDAG& DAG) const {
3316 MachineFunction &MF = DAG.getMachineFunction();
3317 const TargetMachine &TM = MF.getTarget();
3318 const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
3319 TM.getSubtargetImpl()->getRegisterInfo());
3320 const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
3321 unsigned StackAlignment = TFI.getStackAlignment();
3322 uint64_t AlignMask = StackAlignment - 1;
3323 int64_t Offset = StackSize;
3324 unsigned SlotSize = RegInfo->getSlotSize();
3325 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
3326 // Number smaller than 12 so just add the difference.
3327 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
3328 } else {
3329 // Mask out lower bits, add stackalignment once plus the 12 bytes.
3330 Offset = ((~AlignMask) & Offset) + StackAlignment +
3331 (StackAlignment-SlotSize);
3332 }
3333 return Offset;
3334 }
3336 /// MatchingStackOffset - Return true if the given stack call argument is
3337 /// already available in the same position (relatively) of the caller's
3338 /// incoming argument stack.
3339 static
3340 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
3341 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
3342 const X86InstrInfo *TII) {
3343 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
3344 int FI = INT_MAX;
3345 if (Arg.getOpcode() == ISD::CopyFromReg) {
3346 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
3347 if (!TargetRegisterInfo::isVirtualRegister(VR))
3348 return false;
3349 MachineInstr *Def = MRI->getVRegDef(VR);
3350 if (!Def)
3351 return false;
3352 if (!Flags.isByVal()) {
3353 if (!TII->isLoadFromStackSlot(Def, FI))
3354 return false;
3355 } else {
3356 unsigned Opcode = Def->getOpcode();
3357 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
3358 Def->getOperand(1).isFI()) {
3359 FI = Def->getOperand(1).getIndex();
3360 Bytes = Flags.getByValSize();
3361 } else
3362 return false;
3363 }
3364 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
3365 if (Flags.isByVal())
3366 // ByVal argument is passed in as a pointer but it's now being
3367 // dereferenced. e.g.
3368 // define @foo(%struct.X* %A) {
3369 // tail call @bar(%struct.X* byval %A)
3370 // }
3371 return false;
3372 SDValue Ptr = Ld->getBasePtr();
3373 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
3374 if (!FINode)
3375 return false;
3376 FI = FINode->getIndex();
3377 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
3378 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
3379 FI = FINode->getIndex();
3380 Bytes = Flags.getByValSize();
3381 } else
3382 return false;
3384 assert(FI != INT_MAX);
3385 if (!MFI->isFixedObjectIndex(FI))
3386 return false;
3387 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
3388 }
3390 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3391 /// for tail call optimization. Targets which want to do tail call
3392 /// optimization should implement this function.
3393 bool
3394 X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
3395 CallingConv::ID CalleeCC,
3396 bool isVarArg,
3397 bool isCalleeStructRet,
3398 bool isCallerStructRet,
3399 Type *RetTy,
3400 const SmallVectorImpl<ISD::OutputArg> &Outs,
3401 const SmallVectorImpl<SDValue> &OutVals,
3402 const SmallVectorImpl<ISD::InputArg> &Ins,
3403 SelectionDAG &DAG) const {
3404 if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
3405 return false;
3407 // If -tailcallopt is specified, make fastcc functions tail-callable.
3408 const MachineFunction &MF = DAG.getMachineFunction();
3409 const Function *CallerF = MF.getFunction();
3411 // If the function return type is x86_fp80 and the callee return type is not,
3412 // then the FP_EXTEND of the call result is not a nop. It's not safe to
3413 // perform a tailcall optimization here.
3414 if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
3415 return false;
3417 CallingConv::ID CallerCC = CallerF->getCallingConv();
3418 bool CCMatch = CallerCC == CalleeCC;
3419 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
3420 bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
3422 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
3423 if (IsTailCallConvention(CalleeCC) && CCMatch)
3424 return true;
3425 return false;
3426 }
3428 // Look for obvious safe cases to perform tail call optimization that do not
3429 // require ABI changes. This is what gcc calls sibcall.
3431 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
3432 // emit a special epilogue.
3433 const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
3434 DAG.getSubtarget().getRegisterInfo());
3435 if (RegInfo->needsStackRealignment(MF))
3436 return false;
3438 // Also avoid sibcall optimization if either caller or callee uses struct
3439 // return semantics.
3440 if (isCalleeStructRet || isCallerStructRet)
3441 return false;
3443 // An stdcall/thiscall caller is expected to clean up its arguments; the
3444 // callee isn't going to do that.
3445 // FIXME: this is more restrictive than needed. We could produce a tailcall
3446 // when the stack adjustment matches. For example, with a thiscall that takes
3447 // only one argument.
3448 if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
3449 CallerCC == CallingConv::X86_ThisCall))
3450 return false;
3452 // Do not sibcall optimize vararg calls unless all arguments are passed via
3453 // registers.
3454 if (isVarArg && !Outs.empty()) {
3456 // Optimizing for varargs on Win64 is unlikely to be safe without
3457 // additional testing.
3458 if (IsCalleeWin64 || IsCallerWin64)
3459 return false;
3461 SmallVector<CCValAssign, 16> ArgLocs;
3462 CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
3463 *DAG.getContext());
3465 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
3466 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
3467 if (!ArgLocs[i].isRegLoc())
3468 return false;
3469 }
3471 // If the call result is in ST0 / ST1, it needs to be popped off the x87
3472 // stack. Therefore, if it's not used by the call it is not safe to optimize
3473 // this into a sibcall.
3474 bool Unused = false;
3475 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3476 if (!Ins[i].Used) {
3477 Unused = true;
3478 break;
3479 }
3480 }
3481 if (Unused) {
3482 SmallVector<CCValAssign, 16> RVLocs;
3483 CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
3484 *DAG.getContext());
3485 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3486 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
3487 CCValAssign &VA = RVLocs[i];
3488 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
3489 return false;
3490 }
3491 }
3493 // If the calling conventions do not match, then we'd better make sure the
3494 // results are returned in the same way as what the caller expects.
3495 if (!CCMatch) {
3496 SmallVector<CCValAssign, 16> RVLocs1;
3497 CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
3498 *DAG.getContext());
3499 CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
3501 SmallVector<CCValAssign, 16> RVLocs2;
3502 CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
3503 *DAG.getContext());
3504 CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
3506 if (RVLocs1.size() != RVLocs2.size())
3507 return false;
3508 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
3509 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
3510 return false;
3511 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
3512 return false;
3513 if (RVLocs1[i].isRegLoc()) {
3514 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
3515 return false;
3516 } else {
3517 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
3518 return false;
3519 }
3520 }
3521 }
3523 // If the callee takes no arguments then go on to check the results of the
3524 // call.
3525 if (!Outs.empty()) {
3526 // Check if stack adjustment is needed. For now, do not do this if any
3527 // argument is passed on the stack.
3528 SmallVector<CCValAssign, 16> ArgLocs;
3529 CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
3530 *DAG.getContext());
3532 // Allocate shadow area for Win64
3533 if (IsCalleeWin64)
3534 CCInfo.AllocateStack(32, 8);
3536 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
3537 if (CCInfo.getNextStackOffset()) {
3538 MachineFunction &MF = DAG.getMachineFunction();
3539 if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
3540 return false;
3542 // Check if the arguments are already laid out in the right way as
3543 // the caller's fixed stack objects.
3544 MachineFrameInfo *MFI = MF.getFrameInfo();
3545 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3546 const X86InstrInfo *TII =
3547 static_cast<const X86InstrInfo *>(DAG.getSubtarget().getInstrInfo());
3548 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3549 CCValAssign &VA = ArgLocs[i];
3550 SDValue Arg = OutVals[i];
3551 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3552 if (VA.getLocInfo() == CCValAssign::Indirect)
3553 return false;
3554 if (!VA.isRegLoc()) {
3555 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3556 MFI, MRI, TII))
3557 return false;
3558 }
3559 }
3560 }
3562 // If the tailcall address may be in a register, then make sure it's
3563 // possible to register allocate for it. In 32-bit, the call address can
3564 // only target EAX, EDX, or ECX since the tail call must be scheduled after
3565 // callee-saved registers are restored. These happen to be the same
3566 // registers used to pass 'inreg' arguments so watch out for those.
3567 if (!Subtarget->is64Bit() &&
3568 ((!isa<GlobalAddressSDNode>(Callee) &&
3569 !isa<ExternalSymbolSDNode>(Callee)) ||
3570 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
3571 unsigned NumInRegs = 0;
3572 // In PIC we need an extra register to formulate the address computation
3573 // for the callee.
3574 unsigned MaxInRegs =
3575 (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
3577 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3578 CCValAssign &VA = ArgLocs[i];
3579 if (!VA.isRegLoc())
3580 continue;
3581 unsigned Reg = VA.getLocReg();
3582 switch (Reg) {
3583 default: break;
3584 case X86::EAX: case X86::EDX: case X86::ECX:
3585 if (++NumInRegs == MaxInRegs)
3586 return false;
3587 break;
3588 }
3589 }
3590 }
3591 }
3593 return true;
3594 }
3596 FastISel *
3597 X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
3598 const TargetLibraryInfo *libInfo) const {
3599 return X86::createFastISel(funcInfo, libInfo);
3600 }
3602 //===----------------------------------------------------------------------===//
3603 // Other Lowering Hooks
3604 //===----------------------------------------------------------------------===//
3606 static bool MayFoldLoad(SDValue Op) {
3607 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
3608 }
3610 static bool MayFoldIntoStore(SDValue Op) {
3611 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
3612 }
3614 static bool isTargetShuffle(unsigned Opcode) {
3615 switch(Opcode) {
3616 default: return false;
3617 case X86ISD::BLENDI:
3618 case X86ISD::PSHUFB:
3619 case X86ISD::PSHUFD:
3620 case X86ISD::PSHUFHW:
3621 case X86ISD::PSHUFLW:
3622 case X86ISD::SHUFP:
3623 case X86ISD::PALIGNR:
3624 case X86ISD::MOVLHPS:
3625 case X86ISD::MOVLHPD:
3626 case X86ISD::MOVHLPS:
3627 case X86ISD::MOVLPS:
3628 case X86ISD::MOVLPD:
3629 case X86ISD::MOVSHDUP:
3630 case X86ISD::MOVSLDUP:
3631 case X86ISD::MOVDDUP:
3632 case X86ISD::MOVSS:
3633 case X86ISD::MOVSD:
3634 case X86ISD::UNPCKL:
3635 case X86ISD::UNPCKH:
3636 case X86ISD::VPERMILPI:
3637 case X86ISD::VPERM2X128:
3638 case X86ISD::VPERMI:
3639 return true;
3640 }
3641 }
3643 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
3644 SDValue V1, SelectionDAG &DAG) {
3645 switch(Opc) {
3646 default: llvm_unreachable("Unknown x86 shuffle node");
3647 case X86ISD::MOVSHDUP:
3648 case X86ISD::MOVSLDUP:
3649 case X86ISD::MOVDDUP:
3650 return DAG.getNode(Opc, dl, VT, V1);
3651 }
3652 }
3654 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
3655 SDValue V1, unsigned TargetMask,
3656 SelectionDAG &DAG) {
3657 switch(Opc) {
3658 default: llvm_unreachable("Unknown x86 shuffle node");
3659 case X86ISD::PSHUFD:
3660 case X86ISD::PSHUFHW:
3661 case X86ISD::PSHUFLW:
3662 case X86ISD::VPERMILPI:
3663 case X86ISD::VPERMI:
3664 return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
3665 }
3666 }
3668 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
3669 SDValue V1, SDValue V2, unsigned TargetMask,
3670 SelectionDAG &DAG) {
3671 switch(Opc) {
3672 default: llvm_unreachable("Unknown x86 shuffle node");
3673 case X86ISD::PALIGNR:
3674 case X86ISD::VALIGN:
3675 case X86ISD::SHUFP:
3676 case X86ISD::VPERM2X128:
3677 return DAG.getNode(Opc, dl, VT, V1, V2,
3678 DAG.getConstant(TargetMask, MVT::i8));
3679 }
3680 }
3682 static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
3683 SDValue V1, SDValue V2, SelectionDAG &DAG) {
3684 switch(Opc) {
3685 default: llvm_unreachable("Unknown x86 shuffle node");
3686 case X86ISD::MOVLHPS:
3687 case X86ISD::MOVLHPD:
3688 case X86ISD::MOVHLPS:
3689 case X86ISD::MOVLPS:
3690 case X86ISD::MOVLPD:
3691 case X86ISD::MOVSS:
3692 case X86ISD::MOVSD:
3693 case X86ISD::UNPCKL:
3694 case X86ISD::UNPCKH:
3695 return DAG.getNode(Opc, dl, VT, V1, V2);
3696 }
3697 }
3699 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
3700 MachineFunction &MF = DAG.getMachineFunction();
3701 const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
3702 DAG.getSubtarget().getRegisterInfo());
3703 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3704 int ReturnAddrIndex = FuncInfo->getRAIndex();
3706 if (ReturnAddrIndex == 0) {
3707 // Set up a frame object for the return address.
3708 unsigned SlotSize = RegInfo->getSlotSize();
3709 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize,
3710 -(int64_t)SlotSize,
3711 false);
3712 FuncInfo->setRAIndex(ReturnAddrIndex);
3713 }
3715 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
3716 }
3718 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
3719 bool hasSymbolicDisplacement) {
3720 // Offset should fit into 32 bit immediate field.
3721 if (!isInt<32>(Offset))
3722 return false;
3724 // If we don't have a symbolic displacement - we don't have any extra
3725 // restrictions.
3726 if (!hasSymbolicDisplacement)
3727 return true;
3729 // FIXME: Some tweaks might be needed for medium code model.
3730 if (M != CodeModel::Small && M != CodeModel::Kernel)
3731 return false;
3733 // For small code model we assume that latest object is 16MB before end of 31
3734 // bits boundary. We may also accept pretty large negative constants knowing
3735 // that all objects are in the positive half of address space.
3736 if (M == CodeModel::Small && Offset < 16*1024*1024)
3737 return true;
3739 // For kernel code model we know that all object resist in the negative half
3740 // of 32bits address space. We may not accept negative offsets, since they may
3741 // be just off and we may accept pretty large positive ones.
3742 if (M == CodeModel::Kernel && Offset >= 0)
3743 return true;
3745 return false;
3746 }
3748 /// isCalleePop - Determines whether the callee is required to pop its
3749 /// own arguments. Callee pop is necessary to support tail calls.
3750 bool X86::isCalleePop(CallingConv::ID CallingConv,
3751 bool is64Bit, bool IsVarArg, bool TailCallOpt) {
3752 switch (CallingConv) {
3753 default:
3754 return false;
3755 case CallingConv::X86_StdCall:
3756 case CallingConv::X86_FastCall:
3757 case CallingConv::X86_ThisCall:
3758 return !is64Bit;
3759 case CallingConv::Fast:
3760 case CallingConv::GHC:
3761 case CallingConv::HiPE:
3762 if (IsVarArg)
3763 return false;
3764 return TailCallOpt;
3765 }
3766 }
3768 /// \brief Return true if the condition is an unsigned comparison operation.
3769 static bool isX86CCUnsigned(unsigned X86CC) {
3770 switch (X86CC) {
3771 default: llvm_unreachable("Invalid integer condition!");
3772 case X86::COND_E: return true;
3773 case X86::COND_G: return false;
3774 case X86::COND_GE: return false;
3775 case X86::COND_L: return false;
3776 case X86::COND_LE: return false;
3777 case X86::COND_NE: return true;
3778 case X86::COND_B: return true;
3779 case X86::COND_A: return true;
3780 case X86::COND_BE: return true;
3781 case X86::COND_AE: return&nb