1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
11 // selection DAG.
12 //
13 //===----------------------------------------------------------------------===//
15 #include "X86ISelLowering.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "X86CallingConv.h"
18 #include "X86InstrBuilder.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86TargetMachine.h"
21 #include "X86TargetObjectFile.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/VariadicFunction.h"
28 #include "llvm/CodeGen/IntrinsicLowering.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineJumpTableInfo.h"
33 #include "llvm/CodeGen/MachineModuleInfo.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/IR/CallSite.h"
36 #include "llvm/IR/CallingConv.h"
37 #include "llvm/IR/Constants.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalAlias.h"
41 #include "llvm/IR/GlobalVariable.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/MC/MCAsmInfo.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCExpr.h"
47 #include "llvm/MC/MCSymbol.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/MathExtras.h"
52 #include "llvm/Target/TargetOptions.h"
53 #include "X86IntrinsicsInfo.h"
54 #include <bitset>
55 #include <numeric>
56 #include <cctype>
57 using namespace llvm;
59 #define DEBUG_TYPE "x86-isel"
61 STATISTIC(NumTailCalls, "Number of tail calls");
63 static cl::opt<bool> ExperimentalVectorWideningLegalization(
64 "x86-experimental-vector-widening-legalization", cl::init(false),
65 cl::desc("Enable an experimental vector type legalization through widening "
66 "rather than promotion."),
67 cl::Hidden);
69 static cl::opt<bool> ExperimentalVectorShuffleLowering(
70 "x86-experimental-vector-shuffle-lowering", cl::init(true),
71 cl::desc("Enable an experimental vector shuffle lowering code path."),
72 cl::Hidden);
74 static cl::opt<bool> ExperimentalVectorShuffleLegality(
75 "x86-experimental-vector-shuffle-legality", cl::init(false),
76 cl::desc("Enable experimental shuffle legality based on the experimental "
77 "shuffle lowering. Should only be used with the experimental "
78 "shuffle lowering."),
79 cl::Hidden);
81 static cl::opt<int> ReciprocalEstimateRefinementSteps(
82 "x86-recip-refinement-steps", cl::init(1),
83 cl::desc("Specify the number of Newton-Raphson iterations applied to the "
84 "result of the hardware reciprocal estimate instruction."),
85 cl::NotHidden);
87 // Forward declarations.
88 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
89 SDValue V2);
91 static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
92 SelectionDAG &DAG, SDLoc dl,
93 unsigned vectorWidth) {
94 assert((vectorWidth == 128 || vectorWidth == 256) &&
95 "Unsupported vector width");
96 EVT VT = Vec.getValueType();
97 EVT ElVT = VT.getVectorElementType();
98 unsigned Factor = VT.getSizeInBits()/vectorWidth;
99 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
100 VT.getVectorNumElements()/Factor);
102 // Extract from UNDEF is UNDEF.
103 if (Vec.getOpcode() == ISD::UNDEF)
104 return DAG.getUNDEF(ResultVT);
106 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
107 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
109 // This is the index of the first element of the vectorWidth-bit chunk
110 // we want.
111 unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
112 * ElemsPerChunk);
114 // If the input is a buildvector just emit a smaller one.
115 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
116 return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
117 makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
118 ElemsPerChunk));
120 SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
121 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
122 }
124 /// Generate a DAG to grab 128-bits from a vector > 128 bits. This
125 /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
126 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
127 /// instructions or a simple subregister reference. Idx is an index in the
128 /// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
129 /// lowering EXTRACT_VECTOR_ELT operations easier.
130 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
131 SelectionDAG &DAG, SDLoc dl) {
132 assert((Vec.getValueType().is256BitVector() ||
133 Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
134 return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
135 }
137 /// Generate a DAG to grab 256-bits from a 512-bit vector.
138 static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
139 SelectionDAG &DAG, SDLoc dl) {
140 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
141 return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
142 }
144 static SDValue InsertSubVector(SDValue Result, SDValue Vec,
145 unsigned IdxVal, SelectionDAG &DAG,
146 SDLoc dl, unsigned vectorWidth) {
147 assert((vectorWidth == 128 || vectorWidth == 256) &&
148 "Unsupported vector width");
149 // Inserting UNDEF is Result
150 if (Vec.getOpcode() == ISD::UNDEF)
151 return Result;
152 EVT VT = Vec.getValueType();
153 EVT ElVT = VT.getVectorElementType();
154 EVT ResultVT = Result.getValueType();
156 // Insert the relevant vectorWidth bits.
157 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
159 // This is the index of the first element of the vectorWidth-bit chunk
160 // we want.
161 unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
162 * ElemsPerChunk);
164 SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
165 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
166 }
168 /// Generate a DAG to put 128-bits into a vector > 128 bits. This
169 /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
170 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
171 /// simple superregister reference. Idx is an index in the 128 bits
172 /// we want. It need not be aligned to a 128-bit boundary. That makes
173 /// lowering INSERT_VECTOR_ELT operations easier.
174 static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
175 SelectionDAG &DAG,SDLoc dl) {
176 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
177 return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
178 }
180 static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
181 SelectionDAG &DAG, SDLoc dl) {
182 assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
183 return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
184 }
186 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
187 /// instructions. This is used because creating CONCAT_VECTOR nodes of
188 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
189 /// large BUILD_VECTORS.
190 static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
191 unsigned NumElems, SelectionDAG &DAG,
192 SDLoc dl) {
193 SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
194 return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
195 }
197 static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
198 unsigned NumElems, SelectionDAG &DAG,
199 SDLoc dl) {
200 SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
201 return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
202 }
204 // FIXME: This should stop caching the target machine as soon as
205 // we can remove resetOperationActions et al.
206 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM)
207 : TargetLowering(TM) {
208 Subtarget = &TM.getSubtarget<X86Subtarget>();
209 X86ScalarSSEf64 = Subtarget->hasSSE2();
210 X86ScalarSSEf32 = Subtarget->hasSSE1();
211 TD = getDataLayout();
213 resetOperationActions();
214 }
216 void X86TargetLowering::resetOperationActions() {
217 const TargetMachine &TM = getTargetMachine();
218 static bool FirstTimeThrough = true;
220 // If none of the target options have changed, then we don't need to reset the
221 // operation actions.
222 if (!FirstTimeThrough && TO == TM.Options) return;
224 if (!FirstTimeThrough) {
225 // Reinitialize the actions.
226 initActions();
227 FirstTimeThrough = false;
228 }
230 TO = TM.Options;
232 // Set up the TargetLowering object.
233 static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
235 // X86 is weird. It always uses i8 for shift amounts and setcc results.
236 setBooleanContents(ZeroOrOneBooleanContent);
237 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
238 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
240 // For 64-bit, since we have so many registers, use the ILP scheduler.
241 // For 32-bit, use the register pressure specific scheduling.
242 // For Atom, always use ILP scheduling.
243 if (Subtarget->isAtom())
244 setSchedulingPreference(Sched::ILP);
245 else if (Subtarget->is64Bit())
246 setSchedulingPreference(Sched::ILP);
247 else
248 setSchedulingPreference(Sched::RegPressure);
249 const X86RegisterInfo *RegInfo =
250 TM.getSubtarget<X86Subtarget>().getRegisterInfo();
251 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
253 // Bypass expensive divides on Atom when compiling with O2.
254 if (TM.getOptLevel() >= CodeGenOpt::Default) {
255 if (Subtarget->hasSlowDivide32())
256 addBypassSlowDiv(32, 8);
257 if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
258 addBypassSlowDiv(64, 16);
259 }
261 if (Subtarget->isTargetKnownWindowsMSVC()) {
262 // Setup Windows compiler runtime calls.
263 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
264 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
265 setLibcallName(RTLIB::SREM_I64, "_allrem");
266 setLibcallName(RTLIB::UREM_I64, "_aullrem");
267 setLibcallName(RTLIB::MUL_I64, "_allmul");
268 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
269 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
270 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
271 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
272 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
274 // The _ftol2 runtime function has an unusual calling conv, which
275 // is modeled by a special pseudo-instruction.
276 setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
277 setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
278 setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
279 setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
280 }
282 if (Subtarget->isTargetDarwin()) {
283 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
284 setUseUnderscoreSetJmp(false);
285 setUseUnderscoreLongJmp(false);
286 } else if (Subtarget->isTargetWindowsGNU()) {
287 // MS runtime is weird: it exports _setjmp, but longjmp!
288 setUseUnderscoreSetJmp(true);
289 setUseUnderscoreLongJmp(false);
290 } else {
291 setUseUnderscoreSetJmp(true);
292 setUseUnderscoreLongJmp(true);
293 }
295 // Set up the register classes.
296 addRegisterClass(MVT::i8, &X86::GR8RegClass);
297 addRegisterClass(MVT::i16, &X86::GR16RegClass);
298 addRegisterClass(MVT::i32, &X86::GR32RegClass);
299 if (Subtarget->is64Bit())
300 addRegisterClass(MVT::i64, &X86::GR64RegClass);
302 for (MVT VT : MVT::integer_valuetypes())
303 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
305 // We don't accept any truncstore of integer registers.
306 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
307 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
308 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
309 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
310 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
311 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
313 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
315 // SETOEQ and SETUNE require checking two conditions.
316 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
317 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
318 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
319 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
320 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
321 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
323 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
324 // operation.
325 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
326 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
327 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
329 if (Subtarget->is64Bit()) {
330 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
331 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
332 } else if (!TM.Options.UseSoftFloat) {
333 // We have an algorithm for SSE2->double, and we turn this into a
334 // 64-bit FILD followed by conditional FADD for other targets.
335 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
336 // We have an algorithm for SSE2, and we turn this into a 64-bit
337 // FILD for other targets.
338 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
339 }
341 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
342 // this operation.
343 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
344 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
346 if (!TM.Options.UseSoftFloat) {
347 // SSE has no i16 to fp conversion, only i32
348 if (X86ScalarSSEf32) {
349 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
350 // f32 and f64 cases are Legal, f80 case is not
351 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
352 } else {
353 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
354 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
355 }
356 } else {
357 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
358 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
359 }
361 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
362 // are Legal, f80 is custom lowered.
363 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
364 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
366 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
367 // this operation.
368 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
369 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
371 if (X86ScalarSSEf32) {
372 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
373 // f32 and f64 cases are Legal, f80 case is not
374 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
375 } else {
376 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
377 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
378 }
380 // Handle FP_TO_UINT by promoting the destination to a larger signed
381 // conversion.
382 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
383 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
384 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
386 if (Subtarget->is64Bit()) {
387 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
388 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
389 } else if (!TM.Options.UseSoftFloat) {
390 // Since AVX is a superset of SSE3, only check for SSE here.
391 if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
392 // Expand FP_TO_UINT into a select.
393 // FIXME: We would like to use a Custom expander here eventually to do
394 // the optimal thing for SSE vs. the default expansion in the legalizer.
395 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
396 else
397 // With SSE3 we can use fisttpll to convert to a signed i64; without
398 // SSE, we're stuck with a fistpll.
399 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
400 }
402 if (isTargetFTOL()) {
403 // Use the _ftol2 runtime function, which has a pseudo-instruction
404 // to handle its weird calling convention.
405 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
406 }
408 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
409 if (!X86ScalarSSEf64) {
410 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
411 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
412 if (Subtarget->is64Bit()) {
413 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
414 // Without SSE, i64->f64 goes through memory.
415 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
416 }
417 }
419 // Scalar integer divide and remainder are lowered to use operations that
420 // produce two results, to match the available instructions. This exposes
421 // the two-result form to trivial CSE, which is able to combine x/y and x%y
422 // into a single instruction.
423 //
424 // Scalar integer multiply-high is also lowered to use two-result
425 // operations, to match the available instructions. However, plain multiply
426 // (low) operations are left as Legal, as there are single-result
427 // instructions for this in x86. Using the two-result multiply instructions
428 // when both high and low results are needed must be arranged by dagcombine.
429 for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
430 MVT VT = IntVTs[i];
431 setOperationAction(ISD::MULHS, VT, Expand);
432 setOperationAction(ISD::MULHU, VT, Expand);
433 setOperationAction(ISD::SDIV, VT, Expand);
434 setOperationAction(ISD::UDIV, VT, Expand);
435 setOperationAction(ISD::SREM, VT, Expand);
436 setOperationAction(ISD::UREM, VT, Expand);
438 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
439 setOperationAction(ISD::ADDC, VT, Custom);
440 setOperationAction(ISD::ADDE, VT, Custom);
441 setOperationAction(ISD::SUBC, VT, Custom);
442 setOperationAction(ISD::SUBE, VT, Custom);
443 }
445 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
446 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
447 setOperationAction(ISD::BR_CC , MVT::f32, Expand);
448 setOperationAction(ISD::BR_CC , MVT::f64, Expand);
449 setOperationAction(ISD::BR_CC , MVT::f80, Expand);
450 setOperationAction(ISD::BR_CC , MVT::i8, Expand);
451 setOperationAction(ISD::BR_CC , MVT::i16, Expand);
452 setOperationAction(ISD::BR_CC , MVT::i32, Expand);
453 setOperationAction(ISD::BR_CC , MVT::i64, Expand);
454 setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
455 setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
456 setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
457 setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
458 setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
459 setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
460 setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
461 if (Subtarget->is64Bit())
462 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
463 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
464 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
465 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
466 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
467 setOperationAction(ISD::FREM , MVT::f32 , Expand);
468 setOperationAction(ISD::FREM , MVT::f64 , Expand);
469 setOperationAction(ISD::FREM , MVT::f80 , Expand);
470 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
472 // Promote the i8 variants and force them on up to i32 which has a shorter
473 // encoding.
474 setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
475 AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
476 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
477 AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
478 if (Subtarget->hasBMI()) {
479 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
480 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
481 if (Subtarget->is64Bit())
482 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
483 } else {
484 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
485 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
486 if (Subtarget->is64Bit())
487 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
488 }
490 if (Subtarget->hasLZCNT()) {
491 // When promoting the i8 variants, force them to i32 for a shorter
492 // encoding.
493 setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
494 AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
495 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
496 AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
497 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
498 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
499 if (Subtarget->is64Bit())
500 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
501 } else {
502 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
503 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
504 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
505 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
506 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
507 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
508 if (Subtarget->is64Bit()) {
509 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
510 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
511 }
512 }
514 // Special handling for half-precision floating point conversions.
515 // If we don't have F16C support, then lower half float conversions
516 // into library calls.
517 if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
518 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
519 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
520 }
522 // There's never any support for operations beyond MVT::f32.
523 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
524 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
525 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
526 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
528 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
530 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
531 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
532 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
533 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
535 if (Subtarget->hasPOPCNT()) {
536 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
537 } else {
538 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
539 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
540 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
541 if (Subtarget->is64Bit())
542 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
543 }
545 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
547 if (!Subtarget->hasMOVBE())
548 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
550 // These should be promoted to a larger select which is supported.
551 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
552 // X86 wants to expand cmov itself.
553 setOperationAction(ISD::SELECT , MVT::i8 , Custom);
554 setOperationAction(ISD::SELECT , MVT::i16 , Custom);
555 setOperationAction(ISD::SELECT , MVT::i32 , Custom);
556 setOperationAction(ISD::SELECT , MVT::f32 , Custom);
557 setOperationAction(ISD::SELECT , MVT::f64 , Custom);
558 setOperationAction(ISD::SELECT , MVT::f80 , Custom);
559 setOperationAction(ISD::SETCC , MVT::i8 , Custom);
560 setOperationAction(ISD::SETCC , MVT::i16 , Custom);
561 setOperationAction(ISD::SETCC , MVT::i32 , Custom);
562 setOperationAction(ISD::SETCC , MVT::f32 , Custom);
563 setOperationAction(ISD::SETCC , MVT::f64 , Custom);
564 setOperationAction(ISD::SETCC , MVT::f80 , Custom);
565 if (Subtarget->is64Bit()) {
566 setOperationAction(ISD::SELECT , MVT::i64 , Custom);
567 setOperationAction(ISD::SETCC , MVT::i64 , Custom);
568 }
569 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
570 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
571 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
572 // support continuation, user-level threading, and etc.. As a result, no
573 // other SjLj exception interfaces are implemented and please don't build
574 // your own exception handling based on them.
575 // LLVM/Clang supports zero-cost DWARF exception handling.
576 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
577 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
579 // Darwin ABI issue.
580 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
581 setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
582 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
583 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
584 if (Subtarget->is64Bit())
585 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
586 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
587 setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
588 if (Subtarget->is64Bit()) {
589 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
590 setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
591 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
592 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
593 setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
594 }
595 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
596 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
597 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
598 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
599 if (Subtarget->is64Bit()) {
600 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
601 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
602 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
603 }
605 if (Subtarget->hasSSE1())
606 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
608 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
610 // Expand certain atomics
611 for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
612 MVT VT = IntVTs[i];
613 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
614 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
615 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
616 }
618 if (Subtarget->hasCmpxchg16b()) {
619 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
620 }
622 // FIXME - use subtarget debug flags
623 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
624 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
625 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
626 }
628 if (Subtarget->is64Bit()) {
629 setExceptionPointerRegister(X86::RAX);
630 setExceptionSelectorRegister(X86::RDX);
631 } else {
632 setExceptionPointerRegister(X86::EAX);
633 setExceptionSelectorRegister(X86::EDX);
634 }
635 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
636 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
638 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
639 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
641 setOperationAction(ISD::TRAP, MVT::Other, Legal);
642 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
644 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
645 setOperationAction(ISD::VASTART , MVT::Other, Custom);
646 setOperationAction(ISD::VAEND , MVT::Other, Expand);
647 if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
648 // TargetInfo::X86_64ABIBuiltinVaList
649 setOperationAction(ISD::VAARG , MVT::Other, Custom);
650 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
651 } else {
652 // TargetInfo::CharPtrBuiltinVaList
653 setOperationAction(ISD::VAARG , MVT::Other, Expand);
654 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
655 }
657 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
658 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
660 setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
662 if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
663 // f32 and f64 use SSE.
664 // Set up the FP register classes.
665 addRegisterClass(MVT::f32, &X86::FR32RegClass);
666 addRegisterClass(MVT::f64, &X86::FR64RegClass);
668 // Use ANDPD to simulate FABS.
669 setOperationAction(ISD::FABS , MVT::f64, Custom);
670 setOperationAction(ISD::FABS , MVT::f32, Custom);
672 // Use XORP to simulate FNEG.
673 setOperationAction(ISD::FNEG , MVT::f64, Custom);
674 setOperationAction(ISD::FNEG , MVT::f32, Custom);
676 // Use ANDPD and ORPD to simulate FCOPYSIGN.
677 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
678 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
680 // Lower this to FGETSIGNx86 plus an AND.
681 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
682 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
684 // We don't support sin/cos/fmod
685 setOperationAction(ISD::FSIN , MVT::f64, Expand);
686 setOperationAction(ISD::FCOS , MVT::f64, Expand);
687 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
688 setOperationAction(ISD::FSIN , MVT::f32, Expand);
689 setOperationAction(ISD::FCOS , MVT::f32, Expand);
690 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
692 // Expand FP immediates into loads from the stack, except for the special
693 // cases we handle.
694 addLegalFPImmediate(APFloat(+0.0)); // xorpd
695 addLegalFPImmediate(APFloat(+0.0f)); // xorps
696 } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
697 // Use SSE for f32, x87 for f64.
698 // Set up the FP register classes.
699 addRegisterClass(MVT::f32, &X86::FR32RegClass);
700 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
702 // Use ANDPS to simulate FABS.
703 setOperationAction(ISD::FABS , MVT::f32, Custom);
705 // Use XORP to simulate FNEG.
706 setOperationAction(ISD::FNEG , MVT::f32, Custom);
708 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
710 // Use ANDPS and ORPS to simulate FCOPYSIGN.
711 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
712 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
714 // We don't support sin/cos/fmod
715 setOperationAction(ISD::FSIN , MVT::f32, Expand);
716 setOperationAction(ISD::FCOS , MVT::f32, Expand);
717 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
719 // Special cases we handle for FP constants.
720 addLegalFPImmediate(APFloat(+0.0f)); // xorps
721 addLegalFPImmediate(APFloat(+0.0)); // FLD0
722 addLegalFPImmediate(APFloat(+1.0)); // FLD1
723 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
724 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
726 if (!TM.Options.UnsafeFPMath) {
727 setOperationAction(ISD::FSIN , MVT::f64, Expand);
728 setOperationAction(ISD::FCOS , MVT::f64, Expand);
729 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
730 }
731 } else if (!TM.Options.UseSoftFloat) {
732 // f32 and f64 in x87.
733 // Set up the FP register classes.
734 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
735 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
737 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
738 setOperationAction(ISD::UNDEF, MVT::f32, Expand);
739 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
740 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
742 if (!TM.Options.UnsafeFPMath) {
743 setOperationAction(ISD::FSIN , MVT::f64, Expand);
744 setOperationAction(ISD::FSIN , MVT::f32, Expand);
745 setOperationAction(ISD::FCOS , MVT::f64, Expand);
746 setOperationAction(ISD::FCOS , MVT::f32, Expand);
747 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
748 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
749 }
750 addLegalFPImmediate(APFloat(+0.0)); // FLD0
751 addLegalFPImmediate(APFloat(+1.0)); // FLD1
752 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
753 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
754 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
755 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
756 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
757 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
758 }
760 // We don't support FMA.
761 setOperationAction(ISD::FMA, MVT::f64, Expand);
762 setOperationAction(ISD::FMA, MVT::f32, Expand);
764 // Long double always uses X87.
765 if (!TM.Options.UseSoftFloat) {
766 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
767 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
768 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
769 {
770 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
771 addLegalFPImmediate(TmpFlt); // FLD0
772 TmpFlt.changeSign();
773 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
775 bool ignored;
776 APFloat TmpFlt2(+1.0);
777 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
778 &ignored);
779 addLegalFPImmediate(TmpFlt2); // FLD1
780 TmpFlt2.changeSign();
781 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
782 }
784 if (!TM.Options.UnsafeFPMath) {
785 setOperationAction(ISD::FSIN , MVT::f80, Expand);
786 setOperationAction(ISD::FCOS , MVT::f80, Expand);
787 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
788 }
790 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
791 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
792 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
793 setOperationAction(ISD::FRINT, MVT::f80, Expand);
794 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
795 setOperationAction(ISD::FMA, MVT::f80, Expand);
796 }
798 // Always use a library call for pow.
799 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
800 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
801 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
803 setOperationAction(ISD::FLOG, MVT::f80, Expand);
804 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
805 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
806 setOperationAction(ISD::FEXP, MVT::f80, Expand);
807 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
808 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
809 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
811 // First set operation action for all vector types to either promote
812 // (for widening) or expand (for scalarization). Then we will selectively
813 // turn on ones that can be effectively codegen'd.
814 for (MVT VT : MVT::vector_valuetypes()) {
815 setOperationAction(ISD::ADD , VT, Expand);
816 setOperationAction(ISD::SUB , VT, Expand);
817 setOperationAction(ISD::FADD, VT, Expand);
818 setOperationAction(ISD::FNEG, VT, Expand);
819 setOperationAction(ISD::FSUB, VT, Expand);
820 setOperationAction(ISD::MUL , VT, Expand);
821 setOperationAction(ISD::FMUL, VT, Expand);
822 setOperationAction(ISD::SDIV, VT, Expand);
823 setOperationAction(ISD::UDIV, VT, Expand);
824 setOperationAction(ISD::FDIV, VT, Expand);
825 setOperationAction(ISD::SREM, VT, Expand);
826 setOperationAction(ISD::UREM, VT, Expand);
827 setOperationAction(ISD::LOAD, VT, Expand);
828 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
829 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
830 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
831 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
832 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
833 setOperationAction(ISD::FABS, VT, Expand);
834 setOperationAction(ISD::FSIN, VT, Expand);
835 setOperationAction(ISD::FSINCOS, VT, Expand);
836 setOperationAction(ISD::FCOS, VT, Expand);
837 setOperationAction(ISD::FSINCOS, VT, Expand);
838 setOperationAction(ISD::FREM, VT, Expand);
839 setOperationAction(ISD::FMA, VT, Expand);
840 setOperationAction(ISD::FPOWI, VT, Expand);
841 setOperationAction(ISD::FSQRT, VT, Expand);
842 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
843 setOperationAction(ISD::FFLOOR, VT, Expand);
844 setOperationAction(ISD::FCEIL, VT, Expand);
845 setOperationAction(ISD::FTRUNC, VT, Expand);
846 setOperationAction(ISD::FRINT, VT, Expand);
847 setOperationAction(ISD::FNEARBYINT, VT, Expand);
848 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
849 setOperationAction(ISD::MULHS, VT, Expand);
850 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
851 setOperationAction(ISD::MULHU, VT, Expand);
852 setOperationAction(ISD::SDIVREM, VT, Expand);
853 setOperationAction(ISD::UDIVREM, VT, Expand);
854 setOperationAction(ISD::FPOW, VT, Expand);
855 setOperationAction(ISD::CTPOP, VT, Expand);
856 setOperationAction(ISD::CTTZ, VT, Expand);
857 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
858 setOperationAction(ISD::CTLZ, VT, Expand);
859 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
860 setOperationAction(ISD::SHL, VT, Expand);
861 setOperationAction(ISD::SRA, VT, Expand);
862 setOperationAction(ISD::SRL, VT, Expand);
863 setOperationAction(ISD::ROTL, VT, Expand);
864 setOperationAction(ISD::ROTR, VT, Expand);
865 setOperationAction(ISD::BSWAP, VT, Expand);
866 setOperationAction(ISD::SETCC, VT, Expand);
867 setOperationAction(ISD::FLOG, VT, Expand);
868 setOperationAction(ISD::FLOG2, VT, Expand);
869 setOperationAction(ISD::FLOG10, VT, Expand);
870 setOperationAction(ISD::FEXP, VT, Expand);
871 setOperationAction(ISD::FEXP2, VT, Expand);
872 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
873 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
874 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
875 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
876 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
877 setOperationAction(ISD::TRUNCATE, VT, Expand);
878 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
879 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
880 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
881 setOperationAction(ISD::VSELECT, VT, Expand);
882 setOperationAction(ISD::SELECT_CC, VT, Expand);
883 for (MVT InnerVT : MVT::vector_valuetypes()) {
884 setTruncStoreAction(InnerVT, VT, Expand);
886 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
887 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
889 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
890 // types, we have to deal with them whether we ask for Expansion or not.
891 // Setting Expand causes its own optimisation problems though, so leave
892 // them legal.
893 if (VT.getVectorElementType() == MVT::i1)
894 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
895 }
896 }
898 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
899 // with -msoft-float, disable use of MMX as well.
900 if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
901 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
902 // No operations on x86mmx supported, everything uses intrinsics.
903 }
905 // MMX-sized vectors (other than x86mmx) are expected to be expanded
906 // into smaller operations.
907 setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
908 setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
909 setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
910 setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
911 setOperationAction(ISD::AND, MVT::v8i8, Expand);
912 setOperationAction(ISD::AND, MVT::v4i16, Expand);
913 setOperationAction(ISD::AND, MVT::v2i32, Expand);
914 setOperationAction(ISD::AND, MVT::v1i64, Expand);
915 setOperationAction(ISD::OR, MVT::v8i8, Expand);
916 setOperationAction(ISD::OR, MVT::v4i16, Expand);
917 setOperationAction(ISD::OR, MVT::v2i32, Expand);
918 setOperationAction(ISD::OR, MVT::v1i64, Expand);
919 setOperationAction(ISD::XOR, MVT::v8i8, Expand);
920 setOperationAction(ISD::XOR, MVT::v4i16, Expand);
921 setOperationAction(ISD::XOR, MVT::v2i32, Expand);
922 setOperationAction(ISD::XOR, MVT::v1i64, Expand);
923 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
924 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
925 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
926 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
927 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
928 setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
929 setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
930 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
931 setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
932 setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
933 setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
934 setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
935 setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
937 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
938 addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
940 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
941 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
942 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
943 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
944 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
945 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
946 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
947 setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
948 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
949 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
950 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
951 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
952 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
953 }
955 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
956 addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
958 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
959 // registers cannot be used even for integer operations.
960 addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
961 addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
962 addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
963 addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
965 setOperationAction(ISD::ADD, MVT::v16i8, Legal);
966 setOperationAction(ISD::ADD, MVT::v8i16, Legal);
967 setOperationAction(ISD::ADD, MVT::v4i32, Legal);
968 setOperationAction(ISD::ADD, MVT::v2i64, Legal);
969 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
970 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
971 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
972 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
973 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
974 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
975 setOperationAction(ISD::SUB, MVT::v16i8, Legal);
976 setOperationAction(ISD::SUB, MVT::v8i16, Legal);
977 setOperationAction(ISD::SUB, MVT::v4i32, Legal);
978 setOperationAction(ISD::SUB, MVT::v2i64, Legal);
979 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
980 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
981 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
982 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
983 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
984 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
985 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
986 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
988 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
989 setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
990 setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
991 setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
993 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
994 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
995 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
996 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
997 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
999 // Only provide customized ctpop vector bit twiddling for vector types we
1000 // know to perform better than using the popcnt instructions on each vector
1001 // element. If popcnt isn't supported, always provide the custom version.
1002 if (!Subtarget->hasPOPCNT()) {
1003 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
1004 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
1005 }
1007 // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
1008 for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
1009 MVT VT = (MVT::SimpleValueType)i;
1010 // Do not attempt to custom lower non-power-of-2 vectors
1011 if (!isPowerOf2_32(VT.getVectorNumElements()))
1012 continue;
1013 // Do not attempt to custom lower non-128-bit vectors
1014 if (!VT.is128BitVector())
1015 continue;
1016 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1017 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1018 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1019 }
1021 // We support custom legalizing of sext and anyext loads for specific
1022 // memory vector types which we can load as a scalar (or sequence of
1023 // scalars) and extend in-register to a legal 128-bit vector type. For sext
1024 // loads these must work with a single scalar load.
1025 for (MVT VT : MVT::integer_vector_valuetypes()) {
1026 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
1027 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
1028 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
1029 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
1030 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
1031 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
1032 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
1033 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
1034 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
1035 }
1037 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1038 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1039 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
1040 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
1041 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
1042 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
1044 if (Subtarget->is64Bit()) {
1045 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1046 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1047 }
1049 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
1050 for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
1051 MVT VT = (MVT::SimpleValueType)i;
1053 // Do not attempt to promote non-128-bit vectors
1054 if (!VT.is128BitVector())
1055 continue;
1057 setOperationAction(ISD::AND, VT, Promote);
1058 AddPromotedToType (ISD::AND, VT, MVT::v2i64);
1059 setOperationAction(ISD::OR, VT, Promote);
1060 AddPromotedToType (ISD::OR, VT, MVT::v2i64);
1061 setOperationAction(ISD::XOR, VT, Promote);
1062 AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
1063 setOperationAction(ISD::LOAD, VT, Promote);
1064 AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
1065 setOperationAction(ISD::SELECT, VT, Promote);
1066 AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
1067 }
1069 // Custom lower v2i64 and v2f64 selects.
1070 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1071 setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
1072 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1073 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1075 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1076 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1078 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1079 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1080 // As there is no 64-bit GPR available, we need build a special custom
1081 // sequence to convert from v2i32 to v2f32.
1082 if (!Subtarget->is64Bit())
1083 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1085 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1086 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1088 for (MVT VT : MVT::fp_vector_valuetypes())
1089 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
1091 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1092 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1093 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1094 }
1096 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
1097 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1098 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1099 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1100 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1101 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1102 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1103 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1104 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1105 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1106 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1108 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1109 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
1110 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1111 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1112 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1113 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1114 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1115 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1116 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1117 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1119 // FIXME: Do we need to handle scalar-to-vector here?
1120 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1122 setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
1123 setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
1124 setOperationAction(ISD::VSELECT, MVT::v4i32, Custom);
1125 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
1126 setOperationAction(ISD::VSELECT, MVT::v8i16, Custom);
1127 // There is no BLENDI for byte vectors. We don't need to custom lower
1128 // some vselects for now.
1129 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1131 // SSE41 brings specific instructions for doing vector sign extend even in
1132 // cases where we don't have SRA.
1133 for (MVT VT : MVT::integer_vector_valuetypes()) {
1134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
1135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
1136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
1137 }
1139 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1140 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
1141 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1142 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1143 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1144 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1145 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1147 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
1148 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1149 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1150 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1151 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1152 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1154 // i8 and i16 vectors are custom because the source register and source
1155 // source memory operand types are not the same width. f32 vectors are
1156 // custom since the immediate controlling the insert encodes additional
1157 // information.
1158 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1159 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1160 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1161 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1163 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
1164 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
1165 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
1166 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1168 // FIXME: these should be Legal, but that's only for the case where
1169 // the index is constant. For now custom expand to deal with that.
1170 if (Subtarget->is64Bit()) {
1171 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1172 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1173 }
1174 }
1176 if (Subtarget->hasSSE2()) {
1177 setOperationAction(ISD::SRL, MVT::v8i16, Custom);
1178 setOperationAction(ISD::SRL, MVT::v16i8, Custom);
1180 setOperationAction(ISD::SHL, MVT::v8i16, Custom);
1181 setOperationAction(ISD::SHL, MVT::v16i8, Custom);
1183 setOperationAction(ISD::SRA, MVT::v8i16, Custom);
1184 setOperationAction(ISD::SRA, MVT::v16i8, Custom);
1186 // In the customized shift lowering, the legal cases in AVX2 will be
1187 // recognized.
1188 setOperationAction(ISD::SRL, MVT::v2i64, Custom);
1189 setOperationAction(ISD::SRL, MVT::v4i32, Custom);
1191 setOperationAction(ISD::SHL, MVT::v2i64, Custom);
1192 setOperationAction(ISD::SHL, MVT::v4i32, Custom);
1194 setOperationAction(ISD::SRA, MVT::v4i32, Custom);
1195 }
1197 if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
1198 addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
1199 addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
1200 addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
1201 addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
1202 addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
1203 addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
1205 setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
1206 setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
1207 setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
1209 setOperationAction(ISD::FADD, MVT::v8f32, Legal);
1210 setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
1211 setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
1212 setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
1213 setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
1214 setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
1215 setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
1216 setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
1217 setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
1218 setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
1219 setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
1220 setOperationAction(ISD::FABS, MVT::v8f32, Custom);
1222 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
1223 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
1224 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
1225 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1226 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1227 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1228 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1229 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1230 setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
1231 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
1232 setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
1233 setOperationAction(ISD::FABS, MVT::v4f64, Custom);
1235 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1236 // even though v8i16 is a legal type.
1237 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1238 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1239 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1241 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1242 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1243 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1245 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1246 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1248 for (MVT VT : MVT::fp_vector_valuetypes())
1249 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1251 setOperationAction(ISD::SRL, MVT::v16i16, Custom);
1252 setOperationAction(ISD::SRL, MVT::v32i8, Custom);
1254 setOperationAction(ISD::SHL, MVT::v16i16, Custom);
1255 setOperationAction(ISD::SHL, MVT::v32i8, Custom);
1257 setOperationAction(ISD::SRA, MVT::v16i16, Custom);
1258 setOperationAction(ISD::SRA, MVT::v32i8, Custom);
1260 setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
1261 setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
1262 setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
1263 setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
1265 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1266 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1267 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1269 setOperationAction(ISD::VSELECT, MVT::v4f64, Custom);
1270 setOperationAction(ISD::VSELECT, MVT::v4i64, Custom);
1271 setOperationAction(ISD::VSELECT, MVT::v8i32, Custom);
1272 setOperationAction(ISD::VSELECT, MVT::v8f32, Custom);
1274 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1275 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
1276 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1277 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
1278 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
1279 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
1280 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
1281 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
1282 setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
1283 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1284 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1285 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1287 if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
1288 setOperationAction(ISD::FMA, MVT::v8f32, Legal);
1289 setOperationAction(ISD::FMA, MVT::v4f64, Legal);
1290 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
1291 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1292 setOperationAction(ISD::FMA, MVT::f32, Legal);
1293 setOperationAction(ISD::FMA, MVT::f64, Legal);
1294 }
1296 if (Subtarget->hasInt256()) {
1297 setOperationAction(ISD::ADD, MVT::v4i64, Legal);
1298 setOperationAction(ISD::ADD, MVT::v8i32, Legal);
1299 setOperationAction(ISD::ADD, MVT::v16i16, Legal);
1300 setOperationAction(ISD::ADD, MVT::v32i8, Legal);
1302 setOperationAction(ISD::SUB, MVT::v4i64, Legal);
1303 setOperationAction(ISD::SUB, MVT::v8i32, Legal);
1304 setOperationAction(ISD::SUB, MVT::v16i16, Legal);
1305 setOperationAction(ISD::SUB, MVT::v32i8, Legal);
1307 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1308 setOperationAction(ISD::MUL, MVT::v8i32, Legal);
1309 setOperationAction(ISD::MUL, MVT::v16i16, Legal);
1310 // Don't lower v32i8 because there is no 128-bit byte mul
1312 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1313 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1314 setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
1315 setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
1317 setOperationAction(ISD::VSELECT, MVT::v16i16, Custom);
1318 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1320 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1321 // when we have a 256bit-wide blend with immediate.
1322 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1324 // Only provide customized ctpop vector bit twiddling for vector types we
1325 // know to perform better than using the popcnt instructions on each
1326 // vector element. If popcnt isn't supported, always provide the custom
1327 // version.
1328 if (!Subtarget->hasPOPCNT())
1329 setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
1331 // Custom CTPOP always performs better on natively supported v8i32
1332 setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
1334 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1335 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1336 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1337 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1338 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1339 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1340 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1342 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1343 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1344 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1345 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1346 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1347 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1348 } else {
1349 setOperationAction(ISD::ADD, MVT::v4i64, Custom);
1350 setOperationAction(ISD::ADD, MVT::v8i32, Custom);
1351 setOperationAction(ISD::ADD, MVT::v16i16, Custom);
1352 setOperationAction(ISD::ADD, MVT::v32i8, Custom);
1354 setOperationAction(ISD::SUB, MVT::v4i64, Custom);
1355 setOperationAction(ISD::SUB, MVT::v8i32, Custom);
1356 setOperationAction(ISD::SUB, MVT::v16i16, Custom);
1357 setOperationAction(ISD::SUB, MVT::v32i8, Custom);
1359 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1360 setOperationAction(ISD::MUL, MVT::v8i32, Custom);
1361 setOperationAction(ISD::MUL, MVT::v16i16, Custom);
1362 // Don't lower v32i8 because there is no 128-bit byte mul
1363 }
1365 // In the customized shift lowering, the legal cases in AVX2 will be
1366 // recognized.
1367 setOperationAction(ISD::SRL, MVT::v4i64, Custom);
1368 setOperationAction(ISD::SRL, MVT::v8i32, Custom);
1370 setOperationAction(ISD::SHL, MVT::v4i64, Custom);
1371 setOperationAction(ISD::SHL, MVT::v8i32, Custom);
1373 setOperationAction(ISD::SRA, MVT::v8i32, Custom);
1375 // Custom lower several nodes for 256-bit types.
1376 for (MVT VT : MVT::vector_valuetypes()) {
1377 if (VT.getScalarSizeInBits() >= 32) {
1378 setOperationAction(ISD::MLOAD, VT, Legal);
1379 setOperationAction(ISD::MSTORE, VT, Legal);
1380 }
1381 // Extract subvector is special because the value type
1382 // (result) is 128-bit but the source is 256-bit wide.
1383 if (VT.is128BitVector()) {
1384 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1385 }
1386 // Do not attempt to custom lower other non-256-bit vectors
1387 if (!VT.is256BitVector())
1388 continue;
1390 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1391 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1392 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1393 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1394 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1395 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1396 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1397 }
1399 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1400 for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
1401 MVT VT = (MVT::SimpleValueType)i;
1403 // Do not attempt to promote non-256-bit vectors
1404 if (!VT.is256BitVector())
1405 continue;
1407 setOperationAction(ISD::AND, VT, Promote);
1408 AddPromotedToType (ISD::AND, VT, MVT::v4i64);
1409 setOperationAction(ISD::OR, VT, Promote);
1410 AddPromotedToType (ISD::OR, VT, MVT::v4i64);
1411 setOperationAction(ISD::XOR, VT, Promote);
1412 AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
1413 setOperationAction(ISD::LOAD, VT, Promote);
1414 AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
1415 setOperationAction(ISD::SELECT, VT, Promote);
1416 AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
1417 }
1418 }
1420 if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) {
1421 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1422 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1423 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1424 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1426 addRegisterClass(MVT::i1, &X86::VK1RegClass);
1427 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1428 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1430 for (MVT VT : MVT::fp_vector_valuetypes())
1431 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1433 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1434 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1435 setOperationAction(ISD::XOR, MVT::i1, Legal);
1436 setOperationAction(ISD::OR, MVT::i1, Legal);
1437 setOperationAction(ISD::AND, MVT::i1, Legal);
1438 setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
1439 setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
1440 setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
1441 setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
1442 setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
1444 setOperationAction(ISD::FADD, MVT::v16f32, Legal);
1445 setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
1446 setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
1447 setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
1448 setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
1449 setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
1451 setOperationAction(ISD::FADD, MVT::v8f64, Legal);
1452 setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
1453 setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
1454 setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
1455 setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
1456 setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
1457 setOperationAction(ISD::FMA, MVT::v8f64, Legal);
1458 setOperationAction(ISD::FMA, MVT::v16f32, Legal);
1460 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
1461 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
1462 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
1463 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
1464 if (Subtarget->is64Bit()) {
1465 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
1466 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
1467 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
1468 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
1469 }
1470 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1471 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1472 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1473 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1474 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1475 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1476 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1477 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1478 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1479 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1480 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1481 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1482 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1483 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1485 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
1486 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1487 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1488 setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
1489 setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
1490 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1491 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1492 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1493 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1494 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1495 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1496 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1497 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1499 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1500 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1501 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1502 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1503 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1504 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal);
1506 setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
1507 setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
1509 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1511 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
1512 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
1513 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
1514 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
1515 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
1516 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
1517 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1518 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1519 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1521 setOperationAction(ISD::ADD, MVT::v8i64, Legal);
1522 setOperationAction(ISD::ADD, MVT::v16i32, Legal);
1524 setOperationAction(ISD::SUB, MVT::v8i64, Legal);
1525 setOperationAction(ISD::SUB, MVT::v16i32, Legal);
1527 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1529 setOperationAction(ISD::SRL, MVT::v8i64, Custom);
1530 setOperationAction(ISD::SRL, MVT::v16i32, Custom);
1532 setOperationAction(ISD::SHL, MVT::v8i64, Custom);
1533 setOperationAction(ISD::SHL, MVT::v16i32, Custom);
1535 setOperationAction(ISD::SRA, MVT::v8i64, Custom);
1536 setOperationAction(ISD::SRA, MVT::v16i32, Custom);
1538 setOperationAction(ISD::AND, MVT::v8i64, Legal);
1539 setOperationAction(ISD::OR, MVT::v8i64, Legal);
1540 setOperationAction(ISD::XOR, MVT::v8i64, Legal);
1541 setOperationAction(ISD::AND, MVT::v16i32, Legal);
1542 setOperationAction(ISD::OR, MVT::v16i32, Legal);
1543 setOperationAction(ISD::XOR, MVT::v16i32, Legal);
1545 if (Subtarget->hasCDI()) {
1546 setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
1547 setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
1548 }
1550 // Custom lower several nodes.
1551 for (MVT VT : MVT::vector_valuetypes()) {
1552 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1553 // Extract subvector is special because the value type
1554 // (result) is 256/128-bit but the source is 512-bit wide.
1555 if (VT.is128BitVector() || VT.is256BitVector()) {
1556 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1557 }
1558 if (VT.getVectorElementType() == MVT::i1)
1559 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1561 // Do not attempt to custom lower other non-512-bit vectors
1562 if (!VT.is512BitVector())
1563 continue;
1565 if ( EltSize >= 32) {
1566 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1567 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1568 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1569 setOperationAction(ISD::VSELECT, VT, Legal);
1570 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1571 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1572 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1573 setOperationAction(ISD::MLOAD, VT, Legal);
1574 setOperationAction(ISD::MSTORE, VT, Legal);
1575 }
1576 }
1577 for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
1578 MVT VT = (MVT::SimpleValueType)i;
1580 // Do not attempt to promote non-512-bit vectors.
1581 if (!VT.is512BitVector())
1582 continue;
1584 setOperationAction(ISD::SELECT, VT, Promote);
1585 AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
1586 }
1587 }// has AVX-512
1589 if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
1590 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1591 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1593 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1594 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1596 setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
1597 setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
1598 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1599 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1600 setOperationAction(ISD::ADD, MVT::v32i16, Legal);
1601 setOperationAction(ISD::ADD, MVT::v64i8, Legal);
1602 setOperationAction(ISD::SUB, MVT::v32i16, Legal);
1603 setOperationAction(ISD::SUB, MVT::v64i8, Legal);
1604 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1606 for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
1607 const MVT VT = (MVT::SimpleValueType)i;
1609 const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1611 // Do not attempt to promote non-512-bit vectors.
1612 if (!VT.is512BitVector())
1613 continue;
1615 if (EltSize < 32) {
1616 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1617 setOperationAction(ISD::VSELECT, VT, Legal);
1618 }
1619 }
1620 }
1622 if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
1623 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1624 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1626 setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
1627 setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
1628 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
1630 setOperationAction(ISD::AND, MVT::v8i32, Legal);
1631 setOperationAction(ISD::OR, MVT::v8i32, Legal);
1632 setOperationAction(ISD::XOR, MVT::v8i32, Legal);
1633 setOperationAction(ISD::AND, MVT::v4i32, Legal);
1634 setOperationAction(ISD::OR, MVT::v4i32, Legal);
1635 setOperationAction(ISD::XOR, MVT::v4i32, Legal);
1636 }
1638 // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
1639 // of this type with custom code.
1640 for (MVT VT : MVT::vector_valuetypes())
1641 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1643 // We want to custom lower some of our intrinsics.
1644 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1645 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1646 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1647 if (!Subtarget->is64Bit())
1648 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1650 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1651 // handle type legalization for these operations here.
1652 //
1653 // FIXME: We really should do custom legalization for addition and
1654 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1655 // than generic legalization for 64-bit multiplication-with-overflow, though.
1656 for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
1657 // Add/Sub/Mul with overflow operations are custom lowered.
1658 MVT VT = IntVTs[i];
1659 setOperationAction(ISD::SADDO, VT, Custom);
1660 setOperationAction(ISD::UADDO, VT, Custom);
1661 setOperationAction(ISD::SSUBO, VT, Custom);
1662 setOperationAction(ISD::USUBO, VT, Custom);
1663 setOperationAction(ISD::SMULO, VT, Custom);
1664 setOperationAction(ISD::UMULO, VT, Custom);
1665 }
1668 if (!Subtarget->is64Bit()) {
1669 // These libcalls are not available in 32-bit.
1670 setLibcallName(RTLIB::SHL_I128, nullptr);
1671 setLibcallName(RTLIB::SRL_I128, nullptr);
1672 setLibcallName(RTLIB::SRA_I128, nullptr);
1673 }
1675 // Combine sin / cos into one node or libcall if possible.
1676 if (Subtarget->hasSinCos()) {
1677 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1678 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1679 if (Subtarget->isTargetDarwin()) {
1680 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1681 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1682 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1683 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1684 }
1685 }
1687 if (Subtarget->isTargetWin64()) {
1688 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1689 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1690 setOperationAction(ISD::SREM, MVT::i128, Custom);
1691 setOperationAction(ISD::UREM, MVT::i128, Custom);
1692 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1693 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1694 }
1696 // We have target-specific dag combine patterns for the following nodes:
1697 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1698 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1699 setTargetDAGCombine(ISD::VSELECT);
1700 setTargetDAGCombine(ISD::SELECT);
1701 setTargetDAGCombine(ISD::SHL);
1702 setTargetDAGCombine(ISD::SRA);
1703 setTargetDAGCombine(ISD::SRL);
1704 setTargetDAGCombine(ISD::OR);
1705 setTargetDAGCombine(ISD::AND);
1706 setTargetDAGCombine(ISD::ADD);
1707 setTargetDAGCombine(ISD::FADD);
1708 setTargetDAGCombine(ISD::FSUB);
1709 setTargetDAGCombine(ISD::FMA);
1710 setTargetDAGCombine(ISD::SUB);
1711 setTargetDAGCombine(ISD::LOAD);
1712 setTargetDAGCombine(ISD::MLOAD);
1713 setTargetDAGCombine(ISD::STORE);
1714 setTargetDAGCombine(ISD::MSTORE);
1715 setTargetDAGCombine(ISD::ZERO_EXTEND);
1716 setTargetDAGCombine(ISD::ANY_EXTEND);
1717 setTargetDAGCombine(ISD::SIGN_EXTEND);
1718 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1719 setTargetDAGCombine(ISD::TRUNCATE);
1720 setTargetDAGCombine(ISD::SINT_TO_FP);
1721 setTargetDAGCombine(ISD::SETCC);
1722 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1723 setTargetDAGCombine(ISD::BUILD_VECTOR);
1724 if (Subtarget->is64Bit())
1725 setTargetDAGCombine(ISD::MUL);
1726 setTargetDAGCombine(ISD::XOR);
1728 computeRegisterProperties();
1730 // On Darwin, -Os means optimize for size without hurting performance,
1731 // do not reduce the limit.
1732 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1733 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
1734 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1735 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
1736 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1737 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
1738 setPrefLoopAlignment(4); // 2^4 bytes.
1740 // Predictable cmov don't hurt on atom because it's in-order.
1741 PredictableSelectIsExpensive = !Subtarget->isAtom();
1742 EnableExtLdPromotion = true;
1743 setPrefFunctionAlignment(4); // 2^4 bytes.
1745 verifyIntrinsicTables();
1746 }
1748 // This has so far only been implemented for 64-bit MachO.
1749 bool X86TargetLowering::useLoadStackGuardNode() const {
1750 return Subtarget->isTargetMachO() && Subtarget->is64Bit();
1751 }
1753 TargetLoweringBase::LegalizeTypeAction
1754 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1755 if (ExperimentalVectorWideningLegalization &&
1756 VT.getVectorNumElements() != 1 &&
1757 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1758 return TypeWidenVector;
1760 return TargetLoweringBase::getPreferredVectorAction(VT);
1761 }
1763 EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
1764 if (!VT.isVector())
1765 return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
1767 const unsigned NumElts = VT.getVectorNumElements();
1768 const EVT EltVT = VT.getVectorElementType();
1769 if (VT.is512BitVector()) {
1770 if (Subtarget->hasAVX512())
1771 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1772 EltVT == MVT::f32 || EltVT == MVT::f64)
1773 switch(NumElts) {
1774 case 8: return MVT::v8i1;
1775 case 16: return MVT::v16i1;
1776 }
1777 if (Subtarget->hasBWI())
1778 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1779 switch(NumElts) {
1780 case 32: return MVT::v32i1;
1781 case 64: return MVT::v64i1;
1782 }
1783 }
1785 if (VT.is256BitVector() || VT.is128BitVector()) {
1786 if (Subtarget->hasVLX())
1787 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1788 EltVT == MVT::f32 || EltVT == MVT::f64)
1789 switch(NumElts) {
1790 case 2: return MVT::v2i1;
1791 case 4: return MVT::v4i1;
1792 case 8: return MVT::v8i1;
1793 }
1794 if (Subtarget->hasBWI() && Subtarget->hasVLX())
1795 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1796 switch(NumElts) {
1797 case 8: return MVT::v8i1;
1798 case 16: return MVT::v16i1;
1799 case 32: return MVT::v32i1;
1800 }
1801 }
1803 return VT.changeVectorElementTypeToInteger();
1804 }
1806 /// Helper for getByValTypeAlignment to determine
1807 /// the desired ByVal argument alignment.
1808 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1809 if (MaxAlign == 16)
1810 return;
1811 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1812 if (VTy->getBitWidth() == 128)
1813 MaxAlign = 16;
1814 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1815 unsigned EltAlign = 0;
1816 getMaxByValAlign(ATy->getElementType(), EltAlign);
1817 if (EltAlign > MaxAlign)
1818 MaxAlign = EltAlign;
1819 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1820 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1821 unsigned EltAlign = 0;
1822 getMaxByValAlign(STy->getElementType(i), EltAlign);
1823 if (EltAlign > MaxAlign)
1824 MaxAlign = EltAlign;
1825 if (MaxAlign == 16)
1826 break;
1827 }
1828 }
1829 }
1831 /// Return the desired alignment for ByVal aggregate
1832 /// function arguments in the caller parameter area. For X86, aggregates
1833 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
1834 /// are at 4-byte boundaries.
1835 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
1836 if (Subtarget->is64Bit()) {
1837 // Max of 8 and alignment of type.
1838 unsigned TyAlign = TD->getABITypeAlignment(Ty);
1839 if (TyAlign > 8)
1840 return TyAlign;
1841 return 8;
1842 }
1844 unsigned Align = 4;
1845 if (Subtarget->hasSSE1())
1846 getMaxByValAlign(Ty, Align);
1847 return Align;
1848 }
1850 /// Returns the target specific optimal type for load
1851 /// and store operations as a result of memset, memcpy, and memmove
1852 /// lowering. If DstAlign is zero that means it's safe to destination
1853 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1854 /// means there isn't a need to check it against alignment requirement,
1855 /// probably because the source does not need to be loaded. If 'IsMemset' is
1856 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1857 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1858 /// source is constant so it does not need to be loaded.
1859 /// It returns EVT::Other if the type should be determined using generic
1860 /// target-independent logic.
1861 EVT
1862 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1863 unsigned DstAlign, unsigned SrcAlign,
1864 bool IsMemset, bool ZeroMemset,
1865 bool MemcpyStrSrc,
1866 MachineFunction &MF) const {
1867 const Function *F = MF.getFunction();
1868 if ((!IsMemset || ZeroMemset) &&
1869 !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
1870 Attribute::NoImplicitFloat)) {
1871 if (Size >= 16 &&
1872 (Subtarget->isUnalignedMemAccessFast() ||
1873 ((DstAlign == 0 || DstAlign >= 16) &&
1874 (SrcAlign == 0 || SrcAlign >= 16)))) {
1875 if (Size >= 32) {
1876 if (Subtarget->hasInt256())
1877 return MVT::v8i32;
1878 if (Subtarget->hasFp256())
1879 return MVT::v8f32;
1880 }
1881 if (Subtarget->hasSSE2())
1882 return MVT::v4i32;
1883 if (Subtarget->hasSSE1())
1884 return MVT::v4f32;
1885 } else if (!MemcpyStrSrc && Size >= 8 &&
1886 !Subtarget->is64Bit() &&
1887 Subtarget->hasSSE2()) {
1888 // Do not use f64 to lower memcpy if source is string constant. It's
1889 // better to use i32 to avoid the loads.
1890 return MVT::f64;
1891 }
1892 }
1893 if (Subtarget->is64Bit() && Size >= 8)
1894 return MVT::i64;
1895 return MVT::i32;
1896 }
1898 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1899 if (VT == MVT::f32)
1900 return X86ScalarSSEf32;
1901 else if (VT == MVT::f64)
1902 return X86ScalarSSEf64;
1903 return true;
1904 }
1906 bool
1907 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1908 unsigned,
1909 unsigned,
1910 bool *Fast) const {
1911 if (Fast)
1912 *Fast = Subtarget->isUnalignedMemAccessFast();
1913 return true;
1914 }
1916 /// Return the entry encoding for a jump table in the
1917 /// current function. The returned value is a member of the
1918 /// MachineJumpTableInfo::JTEntryKind enum.
1919 unsigned X86TargetLowering::getJumpTableEncoding() const {
1920 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1921 // symbol.
1922 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1923 Subtarget->isPICStyleGOT())
1924 return MachineJumpTableInfo::EK_Custom32;
1926 // Otherwise, use the normal jump table encoding heuristics.
1927 return TargetLowering::getJumpTableEncoding();
1928 }
1930 const MCExpr *
1931 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1932 const MachineBasicBlock *MBB,
1933 unsigned uid,MCContext &Ctx) const{
1934 assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
1935 Subtarget->isPICStyleGOT());
1936 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
1937 // entries.
1938 return MCSymbolRefExpr::Create(MBB->getSymbol(),
1939 MCSymbolRefExpr::VK_GOTOFF, Ctx);
1940 }
1942 /// Returns relocation base for the given PIC jumptable.
1943 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
1944 SelectionDAG &DAG) const {
1945 if (!Subtarget->is64Bit())
1946 // This doesn't have SDLoc associated with it, but is not really the
1947 // same as a Register.
1948 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
1949 return Table;
1950 }
1952 /// This returns the relocation base for the given PIC jumptable,
1953 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
1954 const MCExpr *X86TargetLowering::
1955 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
1956 MCContext &Ctx) const {
1957 // X86-64 uses RIP relative addressing based on the jump table label.
1958 if (Subtarget->isPICStyleRIPRel())
1959 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
1961 // Otherwise, the reference is relative to the PIC base.
1962 return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
1963 }
1965 // FIXME: Why this routine is here? Move to RegInfo!
1966 std::pair<const TargetRegisterClass*, uint8_t>
1967 X86TargetLowering::findRepresentativeClass(MVT VT) const{
1968 const TargetRegisterClass *RRC = nullptr;
1969 uint8_t Cost = 1;
1970 switch (VT.SimpleTy) {
1971 default:
1972 return TargetLowering::findRepresentativeClass(VT);
1973 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
1974 RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
1975 break;
1976 case MVT::x86mmx:
1977 RRC = &X86::VR64RegClass;
1978 break;
1979 case MVT::f32: case MVT::f64:
1980 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1981 case MVT::v4f32: case MVT::v2f64:
1982 case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
1983 case MVT::v4f64:
1984 RRC = &X86::VR128RegClass;
1985 break;
1986 }
1987 return std::make_pair(RRC, Cost);
1988 }
1990 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
1991 unsigned &Offset) const {
1992 if (!Subtarget->isTargetLinux())
1993 return false;
1995 if (Subtarget->is64Bit()) {
1996 // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
1997 Offset = 0x28;
1998 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
1999 AddressSpace = 256;
2000 else
2001 AddressSpace = 257;
2002 } else {
2003 // %gs:0x14 on i386
2004 Offset = 0x14;
2005 AddressSpace = 256;
2006 }
2007 return true;
2008 }
2010 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2011 unsigned DestAS) const {
2012 assert(SrcAS != DestAS && "Expected different address spaces!");
2014 return SrcAS < 256 && DestAS < 256;
2015 }
2017 //===----------------------------------------------------------------------===//
2018 // Return Value Calling Convention Implementation
2019 //===----------------------------------------------------------------------===//
2021 #include "X86GenCallingConv.inc"
2023 bool
2024 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2025 MachineFunction &MF, bool isVarArg,
2026 const SmallVectorImpl<ISD::OutputArg> &Outs,
2027 LLVMContext &Context) const {
2028 SmallVector<CCValAssign, 16> RVLocs;
2029 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2030 return CCInfo.CheckReturn(Outs, RetCC_X86);
2031 }
2033 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2034 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2035 return ScratchRegs;
2036 }
2038 SDValue
2039 X86TargetLowering::LowerReturn(SDValue Chain,
2040 CallingConv::ID CallConv, bool isVarArg,
2041 const SmallVectorImpl<ISD::OutputArg> &Outs,
2042 const SmallVectorImpl<SDValue> &OutVals,
2043 SDLoc dl, SelectionDAG &DAG) const {
2044 MachineFunction &MF = DAG.getMachineFunction();
2045 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2047 SmallVector<CCValAssign, 16> RVLocs;
2048 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2049 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2051 SDValue Flag;
2052 SmallVector<SDValue, 6> RetOps;
2053 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2054 // Operand #1 = Bytes To Pop
2055 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
2056 MVT::i16));
2058 // Copy the result values into the output registers.
2059 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2060 CCValAssign &VA = RVLocs[i];
2061 assert(VA.isRegLoc() && "Can only return in registers!");
2062 SDValue ValToCopy = OutVals[i];
2063 EVT ValVT = ValToCopy.getValueType();
2065 // Promote values to the appropriate types.
2066 if (VA.getLocInfo() == CCValAssign::SExt)
2067 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2068 else if (VA.getLocInfo() == CCValAssign::ZExt)
2069 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2070 else if (VA.getLocInfo() == CCValAssign::AExt)
2071 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2072 else if (VA.getLocInfo() == CCValAssign::BCvt)
2073 ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
2075 assert(VA.getLocInfo() != CCValAssign::FPExt &&
2076 "Unexpected FP-extend for return value.");
2078 // If this is x86-64, and we disabled SSE, we can't return FP values,
2079 // or SSE or MMX vectors.
2080 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2081 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2082 (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
2083 report_fatal_error("SSE register return with SSE disabled");
2084 }
2085 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2086 // llvm-gcc has never done it right and no one has noticed, so this
2087 // should be OK for now.
2088 if (ValVT == MVT::f64 &&
2089 (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
2090 report_fatal_error("SSE2 register return with SSE2 disabled");
2092 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2093 // the RET instruction and handled by the FP Stackifier.
2094 if (VA.getLocReg() == X86::FP0 ||
2095 VA.getLocReg() == X86::FP1) {
2096 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2097 // change the value to the FP stack register class.
2098 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2099 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2100 RetOps.push_back(ValToCopy);
2101 // Don't emit a copytoreg.
2102 continue;
2103 }
2105 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2106 // which is returned in RAX / RDX.
2107 if (Subtarget->is64Bit()) {
2108 if (ValVT == MVT::x86mmx) {
2109 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2110 ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
2111 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2112 ValToCopy);
2113 // If we don't have SSE2 available, convert to v4f32 so the generated
2114 // register is legal.
2115 if (!Subtarget->hasSSE2())
2116 ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
2117 }
2118 }
2119 }
2121 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
2122 Flag = Chain.getValue(1);
2123 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2124 }
2126 // The x86-64 ABIs require that for returning structs by value we copy
2127 // the sret argument into %rax/%eax (depending on ABI) for the return.
2128 // Win32 requires us to put the sret argument to %eax as well.
2129 // We saved the argument into a virtual register in the entry block,
2130 // so now we copy the value out and into %rax/%eax.
2131 if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
2132 (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
2133 MachineFunction &MF = DAG.getMachineFunction();
2134 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2135 unsigned Reg = FuncInfo->getSRetReturnReg();
2136 assert(Reg &&
2137 "SRetReturnReg should have been set in LowerFormalArguments().");
2138 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
2140 unsigned RetValReg
2141 = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
2142 X86::RAX : X86::EAX;
2143 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2144 Flag = Chain.getValue(1);
2146 // RAX/EAX now acts like a return value.
2147 RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
2148 }
2150 RetOps[0] = Chain; // Update chain.
2152 // Add the flag if we have it.
2153 if (Flag.getNode())
2154 RetOps.push_back(Flag);
2156 return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
2157 }
2159 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2160 if (N->getNumValues() != 1)
2161 return false;
2162 if (!N->hasNUsesOfValue(1, 0))
2163 return false;
2165 SDValue TCChain = Chain;
2166 SDNode *Copy = *N->use_begin();
2167 if (Copy->getOpcode() == ISD::CopyToReg) {
2168 // If the copy has a glue operand, we conservatively assume it isn't safe to
2169 // perform a tail call.
2170 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2171 return false;
2172 TCChain = Copy->getOperand(0);
2173 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2174 return false;
2176 bool HasRet = false;
2177 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2178 UI != UE; ++UI) {
2179 if (UI->getOpcode() != X86ISD::RET_FLAG)
2180 return false;
2181 // If we are returning more than one value, we can definitely
2182 // not make a tail call see PR19530
2183 if (UI->getNumOperands() > 4)
2184 return false;
2185 if (UI->getNumOperands() == 4 &&
2186 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2187 return false;
2188 HasRet = true;
2189 }
2191 if (!HasRet)
2192 return false;
2194 Chain = TCChain;
2195 return true;
2196 }
2198 EVT
2199 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
2200 ISD::NodeType ExtendKind) const {
2201 MVT ReturnMVT;
2202 // TODO: Is this also valid on 32-bit?
2203 if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
2204 ReturnMVT = MVT::i8;
2205 else
2206 ReturnMVT = MVT::i32;
2208 EVT MinVT = getRegisterType(Context, ReturnMVT);
2209 return VT.bitsLT(MinVT) ? MinVT : VT;
2210 }
2212 /// Lower the result values of a call into the
2213 /// appropriate copies out of appropriate physical registers.
2214 ///
2215 SDValue
2216 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
2217 CallingConv::ID CallConv, bool isVarArg,
2218 const SmallVectorImpl<ISD::InputArg> &Ins,
2219 SDLoc dl, SelectionDAG &DAG,
2220 SmallVectorImpl<SDValue> &InVals) const {
2222 // Assign locations to each value returned by this call.
2223 SmallVector<CCValAssign, 16> RVLocs;
2224 bool Is64Bit = Subtarget->is64Bit();
2225 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2226 *DAG.getContext());
2227 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2229 // Copy all of the result registers out of their specified physreg.
2230 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2231 CCValAssign &VA = RVLocs[i];
2232 EVT CopyVT = VA.getValVT();
2234 // If this is x86-64, and we disabled SSE, we can't return FP values
2235 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
2236 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
2237 report_fatal_error("SSE register return with SSE disabled");
2238 }
2240 // If we prefer to use the value in xmm registers, copy it out as f80 and
2241 // use a truncate to move it from fp stack reg to xmm reg.
2242 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2243 isScalarFPTypeInSSEReg(VA.getValVT()))
2244 CopyVT = MVT::f80;
2246 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
2247 CopyVT, InFlag).getValue(1);
2248 SDValue Val = Chain.getValue(0);
2250 if (CopyVT != VA.getValVT())
2251 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2252 // This truncation won't change the value.
2253 DAG.getIntPtrConstant(1));
2255 InFlag = Chain.getValue(2);
2256 InVals.push_back(Val);
2257 }
2259 return Chain;
2260 }
2262 //===----------------------------------------------------------------------===//
2263 // C & StdCall & Fast Calling Convention implementation
2264 //===----------------------------------------------------------------------===//
2265 // StdCall calling convention seems to be standard for many Windows' API
2266 // routines and around. It differs from C calling convention just a little:
2267 // callee should clean up the stack, not caller. Symbols should be also
2268 // decorated in some fancy way :) It doesn't support any vector arguments.
2269 // For info on fast calling convention see Fast Calling Convention (tail call)
2270 // implementation LowerX86_32FastCCCallTo.
2272 /// CallIsStructReturn - Determines whether a call uses struct return
2273 /// semantics.
2274 enum StructReturnType {
2275 NotStructReturn,
2276 RegStructReturn,
2277 StackStructReturn
2278 };
2279 static StructReturnType
2280 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
2281 if (Outs.empty())
2282 return NotStructReturn;
2284 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2285 if (!Flags.isSRet())
2286 return NotStructReturn;
2287 if (Flags.isInReg())
2288 return RegStructReturn;
2289 return StackStructReturn;
2290 }
2292 /// Determines whether a function uses struct return semantics.
2293 static StructReturnType
2294 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
2295 if (Ins.empty())
2296 return NotStructReturn;
2298 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2299 if (!Flags.isSRet())
2300 return NotStructReturn;
2301 if (Flags.isInReg())
2302 return RegStructReturn;
2303 return StackStructReturn;
2304 }
2306 /// Make a copy of an aggregate at address specified by "Src" to address
2307 /// "Dst" with size and alignment information specified by the specific
2308 /// parameter attribute. The copy will be passed as a byval function parameter.
2309 static SDValue
2310 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2311 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2312 SDLoc dl) {
2313 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2315 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2316 /*isVolatile*/false, /*AlwaysInline=*/true,
2317 MachinePointerInfo(), MachinePointerInfo());
2318 }
2320 /// Return true if the calling convention is one that
2321 /// supports tail call optimization.
2322 static bool IsTailCallConvention(CallingConv::ID CC) {
2323 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2324 CC == CallingConv::HiPE);
2325 }
2327 /// \brief Return true if the calling convention is a C calling convention.
2328 static bool IsCCallConvention(CallingConv::ID CC) {
2329 return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
2330 CC == CallingConv::X86_64_SysV);
2331 }
2333 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2334 if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
2335 return false;
2337 CallSite CS(CI);
2338 CallingConv::ID CalleeCC = CS.getCallingConv();
2339 if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
2340 return false;
2342 return true;
2343 }
2345 /// Return true if the function is being made into
2346 /// a tailcall target by changing its ABI.
2347 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
2348 bool GuaranteedTailCallOpt) {
2349 return GuaranteedTailCallOpt && IsTailCallConvention(CC);
2350 }
2352 SDValue
2353 X86TargetLowering::LowerMemArgument(SDValue Chain,
2354 CallingConv::ID CallConv,
2355 const SmallVectorImpl<ISD::InputArg> &Ins,
2356 SDLoc dl, SelectionDAG &DAG,
2357 const CCValAssign &VA,
2358 MachineFrameInfo *MFI,
2359 unsigned i) const {
2360 // Create the nodes corresponding to a load from this parameter slot.
2361 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2362 bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
2363 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2364 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2365 EVT ValVT;
2367 // If value is passed by pointer we have address passed instead of the value
2368 // itself.
2369 if (VA.getLocInfo() == CCValAssign::Indirect)
2370 ValVT = VA.getLocVT();
2371 else
2372 ValVT = VA.getValVT();
2374 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2375 // changed with more analysis.
2376 // In case of tail call optimization mark all arguments mutable. Since they
2377 // could be overwritten by lowering of arguments in case of a tail call.
2378 if (Flags.isByVal()) {
2379 unsigned Bytes = Flags.getByValSize();
2380 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2381 int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2382 return DAG.getFrameIndex(FI, getPointerTy());
2383 } else {
2384 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
2385 VA.getLocMemOffset(), isImmutable);
2386 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2387 return DAG.getLoad(ValVT, dl, Chain, FIN,
2388 MachinePointerInfo::getFixedStack(FI),
2389 false, false, false, 0);
2390 }
2391 }
2393 // FIXME: Get this from tablegen.
2394 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2395 const X86Subtarget *Subtarget) {
2396 assert(Subtarget->is64Bit());
2398 if (Subtarget->isCallingConvWin64(CallConv)) {
2399 static const MCPhysReg GPR64ArgRegsWin64[] = {
2400 X86::RCX, X86::RDX, X86::R8, X86::R9
2401 };
2402 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2403 }
2405 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2406 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2407 };
2408 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2409 }
2411 // FIXME: Get this from tablegen.
2412 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2413 CallingConv::ID CallConv,
2414 const X86Subtarget *Subtarget) {
2415 assert(Subtarget->is64Bit());
2416 if (Subtarget->isCallingConvWin64(CallConv)) {
2417 // The XMM registers which might contain var arg parameters are shadowed
2418 // in their paired GPR. So we only need to save the GPR to their home
2419 // slots.
2420 // TODO: __vectorcall will change this.
2421 return None;
2422 }
2424 const Function *Fn = MF.getFunction();
2425 bool NoImplicitFloatOps = Fn->getAttributes().
2426 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
2427 assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
2428 "SSE register cannot be used when SSE is disabled!");
2429 if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
2430 !Subtarget->hasSSE1())
2431 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2432 // registers.
2433 return None;
2435 static const MCPhysReg XMMArgRegs64Bit[] = {
2436 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2437 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2438 };
2439 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2440 }
2442 SDValue
2443 X86TargetLowering::LowerFormalArguments(SDValue Chain,
2444 CallingConv::ID CallConv,
2445 bool isVarArg,
2446 const SmallVectorImpl<ISD::InputArg> &Ins,
2447 SDLoc dl,
2448 SelectionDAG &DAG,
2449 SmallVectorImpl<SDValue> &InVals)
2450 const {
2451 MachineFunction &MF = DAG.getMachineFunction();
2452 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2454 const Function* Fn = MF.getFunction();
2455 if (Fn->hasExternalLinkage() &&
2456 Subtarget->isTargetCygMing() &&
2457 Fn->getName() == "main")
2458 FuncInfo->setForceFramePointer(true);
2460 MachineFrameInfo *MFI = MF.getFrameInfo();
2461 bool Is64Bit = Subtarget->is64Bit();
2462 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2464 assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
2465 "Var args not supported with calling convention fastcc, ghc or hipe");
2467 // Assign locations to all of the incoming arguments.
2468 SmallVector<CCValAssign, 16> ArgLocs;
2469 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2471 // Allocate shadow area for Win64
2472 if (IsWin64)
2473 CCInfo.AllocateStack(32, 8);
2475 CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
2477 unsigned LastVal = ~0U;
2478 SDValue ArgValue;
2479 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2480 CCValAssign &VA = ArgLocs[i];
2481 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
2482 // places.
2483 assert(VA.getValNo() != LastVal &&
2484 "Don't support value assigned to multiple locs yet");
2485 (void)LastVal;
2486 LastVal = VA.getValNo();
2488 if (VA.isRegLoc()) {
2489 EVT RegVT = VA.getLocVT();
2490 const TargetRegisterClass *RC;
2491 if (RegVT == MVT::i32)
2492 RC = &X86::GR32RegClass;
2493 else if (Is64Bit && RegVT == MVT::i64)
2494 RC = &X86::GR64RegClass;
2495 else if (RegVT == MVT::f32)
2496 RC = &X86::FR32RegClass;
2497 else if (RegVT == MVT::f64)
2498 RC = &X86::FR64RegClass;
2499 else if (RegVT.is512BitVector())
2500 RC = &X86::VR512RegClass;
2501 else if (RegVT.is256BitVector())
2502 RC = &X86::VR256RegClass;
2503 else if (RegVT.is128BitVector())
2504 RC = &X86::VR128RegClass;
2505 else if (RegVT == MVT::x86mmx)
2506 RC = &X86::VR64RegClass;
2507 else if (RegVT == MVT::i1)
2508 RC = &X86::VK1RegClass;
2509 else if (RegVT == MVT::v8i1)
2510 RC = &X86::VK8RegClass;
2511 else if (RegVT == MVT::v16i1)
2512 RC = &X86::VK16RegClass;
2513 else if (RegVT == MVT::v32i1)
2514 RC = &X86::VK32RegClass;
2515 else if (RegVT == MVT::v64i1)
2516 RC = &X86::VK64RegClass;
2517 else
2518 llvm_unreachable("Unknown argument type!");
2520 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2521 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2523 // If this is an 8 or 16-bit value, it is really passed promoted to 32
2524 // bits. Insert an assert[sz]ext to capture this, then truncate to the
2525 // right size.
2526 if (VA.getLocInfo() == CCValAssign::SExt)
2527 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2528 DAG.getValueType(VA.getValVT()));
2529 else if (VA.getLocInfo() == CCValAssign::ZExt)
2530 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2531 DAG.getValueType(VA.getValVT()));
2532 else if (VA.getLocInfo() == CCValAssign::BCvt)
2533 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
2535 if (VA.isExtInLoc()) {
2536 // Handle MMX values passed in XMM regs.
2537 if (RegVT.isVector())
2538 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
2539 else
2540 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2541 }
2542 } else {
2543 assert(VA.isMemLoc());
2544 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
2545 }
2547 // If value is passed via pointer - do a load.
2548 if (VA.getLocInfo() == CCValAssign::Indirect)
2549 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
2550 MachinePointerInfo(), false, false, false, 0);
2552 InVals.push_back(ArgValue);
2553 }
2555 if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) {
2556 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2557 // The x86-64 ABIs require that for returning structs by value we copy
2558 // the sret argument into %rax/%eax (depending on ABI) for the return.
2559 // Win32 requires us to put the sret argument to %eax as well.
2560 // Save the argument into a virtual register so that we can access it
2561 // from the return points.
2562 if (Ins[i].Flags.isSRet()) {
2563 unsigned Reg = FuncInfo->getSRetReturnReg();
2564 if (!Reg) {
2565 MVT PtrTy = getPointerTy();
2566 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
2567 FuncInfo->setSRetReturnReg(Reg);
2568 }
2569 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
2570 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
2571 break;
2572 }
2573 }
2574 }
2576 unsigned StackSize = CCInfo.getNextStackOffset();
2577 // Align stack specially for tail calls.
2578 if (FuncIsMadeTailCallSafe(CallConv,
2579 MF.getTarget().Options.GuaranteedTailCallOpt))
2580 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
2582 // If the function takes variable number of arguments, make a frame index for
2583 // the start of the first vararg value... for expansion of llvm.va_start. We
2584 // can skip this if there are no va_start calls.
2585 if (MFI->hasVAStart() &&
2586 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
2587 CallConv != CallingConv::X86_ThisCall))) {
2588 FuncInfo->setVarArgsFrameIndex(
2589 MFI->CreateFixedObject(1, StackSize, true));
2590 }
2592 // Figure out if XMM registers are in use.
2593 assert(!(MF.getTarget().Options.UseSoftFloat &&
2594 Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
2595 Attribute::NoImplicitFloat)) &&
2596 "SSE register cannot be used when SSE is disabled!");
2598 // 64-bit calling conventions support varargs and register parameters, so we
2599 // have to do extra work to spill them in the prologue.
2600 if (Is64Bit && isVarArg && MFI->hasVAStart()) {
2601 // Find the first unallocated argument registers.
2602 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
2603 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
2604 unsigned NumIntRegs =
2605 CCInfo.getFirstUnallocated(ArgGPRs.data(), ArgGPRs.size());
2606 unsigned NumXMMRegs =
2607 CCInfo.getFirstUnallocated(ArgXMMs.data(), ArgXMMs.size());
2608 assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
2609 "SSE register cannot be used when SSE is disabled!");
2611 // Gather all the live in physical registers.
2612 SmallVector<SDValue, 6> LiveGPRs;
2613 SmallVector<SDValue, 8> LiveXMMRegs;
2614 SDValue ALVal;
2615 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
2616 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
2617 LiveGPRs.push_back(
2618 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
2619 }
2620 if (!ArgXMMs.empty()) {
2621 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2622 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
2623 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
2624 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
2625 LiveXMMRegs.push_back(
2626 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
2627 }
2628 }
2630 if (IsWin64) {
2631 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
2632 // Get to the caller-allocated home save location. Add 8 to account
2633 // for the return address.
2634 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
2635 FuncInfo->setRegSaveFrameIndex(
2636 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
2637 // Fixup to set vararg frame on shadow area (4 x i64).
2638 if (NumIntRegs < 4)
2639 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
2640 } else {
2641 // For X86-64, if there are vararg parameters that are passed via
2642 // registers, then we must store them to their spots on the stack so
2643 // they may be loaded by deferencing the result of va_next.
2644 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
2645 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
2646 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
2647 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
2648 }
2650 // Store the integer parameter registers.
2651 SmallVector<SDValue, 8> MemOps;
2652 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
2653 getPointerTy());
2654 unsigned Offset = FuncInfo->getVarArgsGPOffset();
2655 for (SDValue Val : LiveGPRs) {
2656 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
2657 DAG.getIntPtrConstant(Offset));
2658 SDValue Store =
2659 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2660 MachinePointerInfo::getFixedStack(
2661 FuncInfo->getRegSaveFrameIndex(), Offset),
2662 false, false, 0);
2663 MemOps.push_back(Store);
2664 Offset += 8;
2665 }
2667 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
2668 // Now store the XMM (fp + vector) parameter registers.
2669 SmallVector<SDValue, 12> SaveXMMOps;
2670 SaveXMMOps.push_back(Chain);
2671 SaveXMMOps.push_back(ALVal);
2672 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2673 FuncInfo->getRegSaveFrameIndex()));
2674 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2675 FuncInfo->getVarArgsFPOffset()));
2676 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
2677 LiveXMMRegs.end());
2678 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
2679 MVT::Other, SaveXMMOps));
2680 }
2682 if (!MemOps.empty())
2683 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2684 }
2686 if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
2687 // Find the largest legal vector type.
2688 MVT VecVT = MVT::Other;
2689 // FIXME: Only some x86_32 calling conventions support AVX512.
2690 if (Subtarget->hasAVX512() &&
2691 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
2692 CallConv == CallingConv::Intel_OCL_BI)))
2693 VecVT = MVT::v16f32;
2694 else if (Subtarget->hasAVX())
2695 VecVT = MVT::v8f32;
2696 else if (Subtarget->hasSSE2())
2697 VecVT = MVT::v4f32;
2699 // We forward some GPRs and some vector types.
2700 SmallVector<MVT, 2> RegParmTypes;
2701 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
2702 RegParmTypes.push_back(IntVT);
2703 if (VecVT != MVT::Other)
2704 RegParmTypes.push_back(VecVT);
2706 // Compute the set of forwarded registers. The rest are scratch.
2707 SmallVectorImpl<ForwardedRegister> &Forwards =
2708 FuncInfo->getForwardedMustTailRegParms();
2709 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
2711 // Conservatively forward AL on x86_64, since it might be used for varargs.
2712 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
2713 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2714 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
2715 }
2717 // Copy all forwards from physical to virtual registers.
2718 for (ForwardedRegister &F : Forwards) {
2719 // FIXME: Can we use a less constrained schedule?
2720 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2721 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
2722 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
2723 }
2724 }
2726 // Some CCs need callee pop.
2727 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2728 MF.getTarget().Options.GuaranteedTailCallOpt)) {
2729 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
2730 } else {
2731 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
2732 // If this is an sret function, the return should pop the hidden pointer.
2733 if (!Is64Bit && !IsTailCallConvention(CallConv) &&
2734 !Subtarget->getTargetTriple().isOSMSVCRT() &&
2735 argsAreStructReturn(Ins) == StackStructReturn)
2736 FuncInfo->setBytesToPopOnReturn(4);
2737 }
2739 if (!Is64Bit) {
2740 // RegSaveFrameIndex is X86-64 only.
2741 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
2742 if (CallConv == CallingConv::X86_FastCall ||
2743 CallConv == CallingConv::X86_ThisCall)
2744 // fastcc functions can't have varargs.
2745 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
2746 }
2748 FuncInfo->setArgumentStackSize(StackSize);
2750 return Chain;
2751 }
2753 SDValue
2754 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
2755 SDValue StackPtr, SDValue Arg,
2756 SDLoc dl, SelectionDAG &DAG,
2757 const CCValAssign &VA,
2758 ISD::ArgFlagsTy Flags) const {
2759 unsigned LocMemOffset = VA.getLocMemOffset();
2760 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
2761 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
2762 if (Flags.isByVal())
2763 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
2765 return DAG.getStore(Chain, dl, Arg, PtrOff,
2766 MachinePointerInfo::getStack(LocMemOffset),
2767 false, false, 0);
2768 }
2770 /// Emit a load of return address if tail call
2771 /// optimization is performed and it is required.
2772 SDValue
2773 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
2774 SDValue &OutRetAddr, SDValue Chain,
2775 bool IsTailCall, bool Is64Bit,
2776 int FPDiff, SDLoc dl) const {
2777 // Adjust the Return address stack slot.
2778 EVT VT = getPointerTy();
2779 OutRetAddr = getReturnAddressFrameIndex(DAG);
2781 // Load the "old" Return address.
2782 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
2783 false, false, false, 0);
2784 return SDValue(OutRetAddr.getNode(), 1);
2785 }
2787 /// Emit a store of the return address if tail call
2788 /// optimization is performed and it is required (FPDiff!=0).
2789 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
2790 SDValue Chain, SDValue RetAddrFrIdx,
2791 EVT PtrVT, unsigned SlotSize,
2792 int FPDiff, SDLoc dl) {
2793 // Store the return address to the appropriate stack slot.
2794 if (!FPDiff) return Chain;
2795 // Calculate the new stack slot for the return address.
2796 int NewReturnAddrFI =
2797 MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
2798 false);
2799 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
2800 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
2801 MachinePointerInfo::getFixedStack(NewReturnAddrFI),
2802 false, false, 0);
2803 return Chain;
2804 }
2806 SDValue
2807 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2808 SmallVectorImpl<SDValue> &InVals) const {
2809 SelectionDAG &DAG = CLI.DAG;
2810 SDLoc &dl = CLI.DL;
2811 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2812 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2813 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2814 SDValue Chain = CLI.Chain;
2815 SDValue Callee = CLI.Callee;
2816 CallingConv::ID CallConv = CLI.CallConv;
2817 bool &isTailCall = CLI.IsTailCall;
2818 bool isVarArg = CLI.IsVarArg;
2820 MachineFunction &MF = DAG.getMachineFunction();
2821 bool Is64Bit = Subtarget->is64Bit();
2822 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2823 StructReturnType SR = callIsStructReturn(Outs);
2824 bool IsSibcall = false;
2825 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2827 if (MF.getTarget().Options.DisableTailCalls)
2828 isTailCall = false;
2830 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
2831 if (IsMustTail) {
2832 // Force this to be a tail call. The verifier rules are enough to ensure
2833 // that we can lower this successfully without moving the return address
2834 // around.
2835 isTailCall = true;
2836 } else if (isTailCall) {
2837 // Check if it's really possible to do a tail call.
2838 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
2839 isVarArg, SR != NotStructReturn,
2840 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
2841 Outs, OutVals, Ins, DAG);
2843 // Sibcalls are automatically detected tailcalls which do not require
2844 // ABI changes.
2845 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
2846 IsSibcall = true;
2848 if (isTailCall)
2849 ++NumTailCalls;
2850 }
2852 assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
2853 "Var args not supported with calling convention fastcc, ghc or hipe");
2855 // Analyze operands of the call, assigning locations to each operand.
2856 SmallVector<CCValAssign, 16> ArgLocs;
2857 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2859 // Allocate shadow area for Win64
2860 if (IsWin64)
2861 CCInfo.AllocateStack(32, 8);
2863 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
2865 // Get a count of how many bytes are to be pushed on the stack.
2866 unsigned NumBytes = CCInfo.getNextStackOffset();
2867 if (IsSibcall)
2868 // This is a sibcall. The memory operands are available in caller's
2869 // own caller's stack.
2870 NumBytes = 0;
2871 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2872 IsTailCallConvention(CallConv))
2873 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2875 int FPDiff = 0;
2876 if (isTailCall && !IsSibcall && !IsMustTail) {
2877 // Lower arguments at fp - stackoffset + fpdiff.
2878 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2880 FPDiff = NumBytesCallerPushed - NumBytes;
2882 // Set the delta of movement of the returnaddr stackslot.
2883 // But only set if delta is greater than previous delta.
2884 if (FPDiff < X86Info->getTCReturnAddrDelta())
2885 X86Info->setTCReturnAddrDelta(FPDiff);
2886 }
2888 unsigned NumBytesToPush = NumBytes;
2889 unsigned NumBytesToPop = NumBytes;
2891 // If we have an inalloca argument, all stack space has already been allocated
2892 // for us and be right at the top of the stack. We don't support multiple
2893 // arguments passed in memory when using inalloca.
2894 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2895 NumBytesToPush = 0;
2896 if (!ArgLocs.back().isMemLoc())
2897 report_fatal_error("cannot use inalloca attribute on a register "
2898 "parameter");
2899 if (ArgLocs.back().getLocMemOffset() != 0)
2900 report_fatal_error("any parameter with the inalloca attribute must be "
2901 "the only memory argument");
2902 }
2904 if (!IsSibcall)
2905 Chain = DAG.getCALLSEQ_START(
2906 Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
2908 SDValue RetAddrFrIdx;
2909 // Load return address for tail calls.
2910 if (isTailCall && FPDiff)
2911 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2912 Is64Bit, FPDiff, dl);
2914 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2915 SmallVector<SDValue, 8> MemOpChains;
2916 SDValue StackPtr;
2918 // Walk the register/memloc assignments, inserting copies/loads. In the case
2919 // of tail call optimization arguments are handle later.
2920 const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
2921 DAG.getSubtarget().getRegisterInfo());
2922 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2923 // Skip inalloca arguments, they have already been written.
2924 ISD::ArgFlagsTy Flags = Outs[i].Flags;
2925 if (Flags.isInAlloca())
2926 continue;
2928 CCValAssign &VA = ArgLocs[i];
2929 EVT RegVT = VA.getLocVT();
2930 SDValue Arg = OutVals[i];
2931 bool isByVal = Flags.isByVal();
2933 // Promote the value if needed.
2934 switch (VA.getLocInfo()) {
2935 default: llvm_unreachable("Unknown loc info!");
2936 case CCValAssign::Full: break;
2937 case CCValAssign::SExt:
2938 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2939 break;
2940 case CCValAssign::ZExt:
2941 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2942 break;
2943 case CCValAssign::AExt:
2944 if (RegVT.is128BitVector()) {
2945 // Special case: passing MMX values in XMM registers.
2946 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
2947 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2948 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2949 } else
2950 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2951 break;
2952 case CCValAssign::BCvt:
2953 Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
2954 break;
2955 case CCValAssign::Indirect: {
2956 // Store the argument.
2957 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2958 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2959 Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
2960 MachinePointerInfo::getFixedStack(FI),
2961 false, false, 0);
2962 Arg = SpillSlot;
2963 break;
2964 }
2965 }
2967 if (VA.isRegLoc()) {
2968 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2969 if (isVarArg && IsWin64) {
2970 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2971 // shadow reg if callee is a varargs function.
2972 unsigned ShadowReg = 0;
2973 switch (VA.getLocReg()) {
2974 case X86::XMM0: ShadowReg = X86::RCX; break;
2975 case X86::XMM1: ShadowReg = X86::RDX; break;
2976 case X86::XMM2: ShadowReg = X86::R8; break;
2977 case X86::XMM3: ShadowReg = X86::R9; break;
2978 }
2979 if (ShadowReg)
2980 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2981 }
2982 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2983 assert(VA.isMemLoc());
2984 if (!StackPtr.getNode())
2985 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2986 getPointerTy());
2987 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2988 dl, DAG, VA, Flags));
2989 }
2990 }
2992 if (!MemOpChains.empty())
2993 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2995 if (Subtarget->isPICStyleGOT()) {
2996 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2997 // GOT pointer.
2998 if (!isTailCall) {
2999 RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
3000 DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
3001 } else {
3002 // If we are tail calling and generating PIC/GOT style code load the
3003 // address of the callee into ECX. The value in ecx is used as target of
3004 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3005 // for tail calls on PIC/GOT architectures. Normally we would just put the
3006 // address of GOT into ebx and then call target@PLT. But for tail calls
3007 // ebx would be restored (since ebx is callee saved) before jumping to the
3008 // target@PLT.
3010 // Note: The actual moving to ECX is done further down.
3011 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3012 if (G && !G->getGlobal()->hasHiddenVisibility() &&
3013 !G->getGlobal()->hasProtectedVisibility())
3014 Callee = LowerGlobalAddress(Callee, DAG);
3015 else if (isa<ExternalSymbolSDNode>(Callee))
3016 Callee = LowerExternalSymbol(Callee, DAG);
3017 }
3018 }
3020 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3021 // From AMD64 ABI document:
3022 // For calls that may call functions that use varargs or stdargs
3023 // (prototype-less calls or calls to functions containing ellipsis (...) in
3024 // the declaration) %al is used as hidden argument to specify the number
3025 // of SSE registers used. The contents of %al do not need to match exactly
3026 // the number of registers, but must be an ubound on the number of SSE
3027 // registers used and is in the range 0 - 8 inclusive.
3029 // Count the number of XMM registers allocated.
3030 static const MCPhysReg XMMArgRegs[] = {
3031 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3032 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3033 };
3034 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
3035 assert((Subtarget->hasSSE1() || !NumXMMRegs)
3036 && "SSE registers cannot be used when SSE is disabled");
3038 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3039 DAG.getConstant(NumXMMRegs, MVT::i8)));
3040 }
3042 if (isVarArg && IsMustTail) {
3043 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3044 for (const auto &F : Forwards) {
3045 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3046 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3047 }
3048 }
3050 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3051 // don't need this because the eligibility check rejects calls that require
3052 // shuffling arguments passed in memory.
3053 if (!IsSibcall && isTailCall) {
3054 // Force all the incoming stack arguments to be loaded from the stack
3055 // before any new outgoing arguments are stored to the stack, because the
3056 // outgoing stack slots may alias the incoming argument stack slots, and
3057 // the alias isn't otherwise explicit. This is slightly more conservative
3058 // than necessary, because it means that each store effectively depends
3059 // on every argument instead of just those arguments it would clobber.
3060 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3062 SmallVector<SDValue, 8> MemOpChains2;
3063 SDValue FIN;
3064 int FI = 0;
3065 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3066 CCValAssign &VA = ArgLocs[i];
3067 if (VA.isRegLoc())
3068 continue;
3069 assert(VA.isMemLoc());
3070 SDValue Arg = OutVals[i];
3071 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3072 // Skip inalloca arguments. They don't require any work.
3073 if (Flags.isInAlloca())
3074 continue;
3075 // Create frame index.
3076 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3077 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3078 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3079 FIN = DAG.getFrameIndex(FI, getPointerTy());
3081 if (Flags.isByVal()) {
3082 // Copy relative to framepointer.
3083 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
3084 if (!StackPtr.getNode())
3085 StackPtr = DAG.getCopyFromReg(Chain, dl,
3086 RegInfo->getStackRegister(),
3087 getPointerTy());
3088 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
3090 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3091 ArgChain,
3092 Flags, DAG, dl));
3093 } else {
3094 // Store relative to framepointer.
3095 MemOpChains2.push_back(
3096 DAG.getStore(ArgChain, dl, Arg, FIN,
3097 MachinePointerInfo::getFixedStack(FI),
3098 false, false, 0));
3099 }
3100 }
3102 if (!MemOpChains2.empty())
3103 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3105 // Store the return address to the appropriate stack slot.
3106 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3107 getPointerTy(), RegInfo->getSlotSize(),
3108 FPDiff, dl);
3109 }
3111 // Build a sequence of copy-to-reg nodes chained together with token chain
3112 // and flag operands which copy the outgoing args into registers.
3113 SDValue InFlag;
3114 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3115 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3116 RegsToPass[i].second, InFlag);
3117 InFlag = Chain.getValue(1);
3118 }
3120 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3121 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
3122 // In the 64-bit large code model, we have to make all calls
3123 // through a register, since the call instruction's 32-bit
3124 // pc-relative offset may not be large enough to hold the whole
3125 // address.
3126 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3127 // If the callee is a GlobalAddress node (quite common, every direct call
3128 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3129 // it.
3130 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3132 // We should use extra load for direct calls to dllimported functions in
3133 // non-JIT mode.
3134 const GlobalValue *GV = G->getGlobal();
3135 if (!GV->hasDLLImportStorageClass()) {
3136 unsigned char OpFlags = 0;
3137 bool ExtraLoad = false;
3138 unsigned WrapperKind = ISD::DELETED_NODE;
3140 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
3141 // external symbols most go through the PLT in PIC mode. If the symbol
3142 // has hidden or protected visibility, or if it is static or local, then
3143 // we don't need to use the PLT - we can directly call it.
3144 if (Subtarget->isTargetELF() &&
3145 DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
3146 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
3147 OpFlags = X86II::MO_PLT;
3148 } else if (Subtarget->isPICStyleStubAny() &&
3149 (GV->isDeclaration() || GV->isWeakForLinker()) &&
3150 (!Subtarget->getTargetTriple().isMacOSX() ||
3151 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
3152 // PC-relative references to external symbols should go through $stub,
3153 // unless we're building with the leopard linker or later, which
3154 // automatically synthesizes these stubs.
3155 OpFlags = X86II::MO_DARWIN_STUB;
3156 } else if (Subtarget->isPICStyleRIPRel() &&
3157 isa<Function>(GV) &&
3158 cast<Function>(GV)->getAttributes().
3159 hasAttribute(AttributeSet::FunctionIndex,
3160 Attribute::NonLazyBind)) {
3161 // If the function is marked as non-lazy, generate an indirect call
3162 // which loads from the GOT directly. This avoids runtime overhead
3163 // at the cost of eager binding (and one extra byte of encoding).
3164 OpFlags = X86II::MO_GOTPCREL;
3165 WrapperKind = X86ISD::WrapperRIP;
3166 ExtraLoad = true;
3167 }
3169 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
3170 G->getOffset(), OpFlags);
3172 // Add a wrapper if needed.
3173 if (WrapperKind != ISD::DELETED_NODE)
3174 Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
3175 // Add extra indirection if needed.
3176 if (ExtraLoad)
3177 Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
3178 MachinePointerInfo::getGOT(),
3179 false, false, false, 0);
3180 }
3181 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3182 unsigned char OpFlags = 0;
3184 // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
3185 // external symbols should go through the PLT.
3186 if (Subtarget->isTargetELF() &&
3187 DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
3188 OpFlags = X86II::MO_PLT;
3189 } else if (Subtarget->isPICStyleStubAny() &&
3190 (!Subtarget->getTargetTriple().isMacOSX() ||
3191 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
3192 // PC-relative references to external symbols should go through $stub,
3193 // unless we're building with the leopard linker or later, which
3194 // automatically synthesizes these stubs.
3195 OpFlags = X86II::MO_DARWIN_STUB;
3196 }
3198 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
3199 OpFlags);
3200 } else if (Subtarget->isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) {
3201 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3202 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3203 }
3205 // Returns a chain & a flag for retval copy to use.
3206 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3207 SmallVector<SDValue, 8> Ops;
3209 if (!IsSibcall && isTailCall) {
3210 Chain = DAG.getCALLSEQ_END(Chain,
3211 DAG.getIntPtrConstant(NumBytesToPop, true),
3212 DAG.getIntPtrConstant(0, true), InFlag, dl);
3213 InFlag = Chain.getValue(1);
3214 }
3216 Ops.push_back(Chain);
3217 Ops.push_back(Callee);
3219 if (isTailCall)
3220 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
3222 // Add argument registers to the end of the list so that they are known live
3223 // into the call.
3224 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3225 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3226 RegsToPass[i].second.getValueType()));
3228 // Add a register mask operand representing the call-preserved registers.
3229 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
3230 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
3231 assert(Mask && "Missing call preserved mask for calling convention");
3232 Ops.push_back(DAG.getRegisterMask(Mask));
3234 if (InFlag.getNode())
3235 Ops.push_back(InFlag);
3237 if (isTailCall) {
3238 // We used to do:
3239 //// If this is the first return lowered for this function, add the regs
3240 //// to the liveout set for the function.
3241 // This isn't right, although it's probably harmless on x86; liveouts
3242 // should be computed from returns not tail calls. Consider a void
3243 // function making a tail call to a function returning int.