summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 19d9f34)
raw | patch | inline | side by side (parent: 19d9f34)
author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | |
Mon, 5 Jan 2015 17:02:28 +0000 (17:02 +0000) | ||
committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | |
Mon, 5 Jan 2015 17:02:28 +0000 (17:02 +0000) |
For 0-lane stores, we used to generate code similar to:
fmov w8, s0
str w8, [x0, x1, lsl #2]
instead of:
str s0, [x0, x1, lsl #2]
To correct that: for store lane 0 patterns, directly match to STR <subreg>0.
Byte-sized instructions don't have the special case for a 0 index,
because FPR8s are defined to have untyped content.
rdar://16372710
Differential Revision: http://reviews.llvm.org/D6772
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225181 91177308-0d34-0410-b5e6-96231b3b80d8
fmov w8, s0
str w8, [x0, x1, lsl #2]
instead of:
str s0, [x0, x1, lsl #2]
To correct that: for store lane 0 patterns, directly match to STR <subreg>0.
Byte-sized instructions don't have the special case for a 0 index,
because FPR8s are defined to have untyped content.
rdar://16372710
Differential Revision: http://reviews.llvm.org/D6772
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225181 91177308-0d34-0410-b5e6-96231b3b80d8
lib/Target/AArch64/AArch64InstrInfo.td | patch | blob | history | |
test/CodeGen/AArch64/arm64-st1.ll | patch | blob | history |
index f4a555499d2f0cc830b0fc68e974a58f0dd42bf4..cae02d0a32e0db0c2cd55baf02618bea48cf73e6 100644 (file)
}
} // AddedComplexity = 10
+// Match stores from lane 0 to the appropriate subreg's store.
+multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
+ ValueType VecTy, ValueType STy,
+ SubRegIndex SubRegIdx,
+ Instruction STRW, Instruction STRX> {
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
+ (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
+ (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
+}
+
+let AddedComplexity = 19 in {
+ defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro16, store , v8i16, i16, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro32, truncstorei32, v4i32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro32, store , v4i32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro32, store , v4f32, f32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro64, store , v2i64, i64, dsub, STRDroW, STRDroX>;
+ defm : VecROStoreLane0Pat<ro64, store , v2f64, f64, dsub, STRDroW, STRDroX>;
+}
+
//---
// (unsigned immediate)
defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str",
index 4370484478c0ec987407203084f5de33ad7c64dd..a4818bd8850368fe387ae7dfe0fdc7ab121cf790 100644 (file)
ret void
}
+define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_16b
+; CHECK: umov.b w[[WREG:[0-9]+]], v0[0]
+; CHECK: strb w[[WREG]], [x0, x1]
+ %ptr = getelementptr i8* %D, i64 %offset
+ %tmp = extractelement <16 x i8> %A, i32 0
+ store i8 %tmp, i8* %ptr
+ ret void
+}
+
define void @st1lane_8h(<8 x i16> %A, i16* %D) {
; CHECK-LABEL: st1lane_8h
; CHECK: st1.h
ret void
}
+define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_8h
+; CHECK: str h0, [x0, x1, lsl #1]
+ %ptr = getelementptr i16* %D, i64 %offset
+ %tmp = extractelement <8 x i16> %A, i32 0
+ store i16 %tmp, i16* %ptr
+ ret void
+}
+
define void @st1lane_4s(<4 x i32> %A, i32* %D) {
; CHECK-LABEL: st1lane_4s
; CHECK: st1.s
ret void
}
+define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_4s
+; CHECK: str s0, [x0, x1, lsl #2]
+ %ptr = getelementptr i32* %D, i64 %offset
+ %tmp = extractelement <4 x i32> %A, i32 0
+ store i32 %tmp, i32* %ptr
+ ret void
+}
+
define void @st1lane_4s_float(<4 x float> %A, float* %D) {
; CHECK-LABEL: st1lane_4s_float
; CHECK: st1.s
ret void
}
+define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_4s_float
+; CHECK: str s0, [x0, x1, lsl #2]
+ %ptr = getelementptr float* %D, i64 %offset
+ %tmp = extractelement <4 x float> %A, i32 0
+ store float %tmp, float* %ptr
+ ret void
+}
+
define void @st1lane_2d(<2 x i64> %A, i64* %D) {
; CHECK-LABEL: st1lane_2d
; CHECK: st1.d
ret void
}
+define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2d
+; CHECK: str d0, [x0, x1, lsl #3]
+ %ptr = getelementptr i64* %D, i64 %offset
+ %tmp = extractelement <2 x i64> %A, i32 0
+ store i64 %tmp, i64* %ptr
+ ret void
+}
+
define void @st1lane_2d_double(<2 x double> %A, double* %D) {
; CHECK-LABEL: st1lane_2d_double
; CHECK: st1.d
ret void
}
+define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2d_double
+; CHECK: str d0, [x0, x1, lsl #3]
+ %ptr = getelementptr double* %D, i64 %offset
+ %tmp = extractelement <2 x double> %A, i32 0
+ store double %tmp, double* %ptr
+ ret void
+}
+
define void @st1lane_8b(<8 x i8> %A, i8* %D) {
; CHECK-LABEL: st1lane_8b
; CHECK: st1.b
ret void
}
+define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_8b
+; CHECK: umov.b w[[WREG:[0-9]+]], v0[0]
+; CHECK: strb w[[WREG]], [x0, x1]
+ %ptr = getelementptr i8* %D, i64 %offset
+ %tmp = extractelement <8 x i8> %A, i32 0
+ store i8 %tmp, i8* %ptr
+ ret void
+}
+
define void @st1lane_4h(<4 x i16> %A, i16* %D) {
; CHECK-LABEL: st1lane_4h
; CHECK: st1.h
ret void
}
+define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_4h
+; CHECK: str h0, [x0, x1, lsl #1]
+ %ptr = getelementptr i16* %D, i64 %offset
+ %tmp = extractelement <4 x i16> %A, i32 0
+ store i16 %tmp, i16* %ptr
+ ret void
+}
+
define void @st1lane_2s(<2 x i32> %A, i32* %D) {
; CHECK-LABEL: st1lane_2s
; CHECK: st1.s
ret void
}
+define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2s
+; CHECK: str s0, [x0, x1, lsl #2]
+ %ptr = getelementptr i32* %D, i64 %offset
+ %tmp = extractelement <2 x i32> %A, i32 0
+ store i32 %tmp, i32* %ptr
+ ret void
+}
+
define void @st1lane_2s_float(<2 x float> %A, float* %D) {
; CHECK-LABEL: st1lane_2s_float
; CHECK: st1.s
ret void
}
+define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2s_float
+; CHECK: str s0, [x0, x1, lsl #2]
+ %ptr = getelementptr float* %D, i64 %offset
+ %tmp = extractelement <2 x float> %A, i32 0
+ store float %tmp, float* %ptr
+ ret void
+}
+
define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
; CHECK-LABEL: st2lane_16b
; CHECK: st2.b