Enable unaligned memory access on PPC for scalar types

author Hal Finkel <hfinkel@anl.gov>

Fri, 15 Mar 2013 15:27:13 +0000 (15:27 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Fri, 15 Mar 2013 15:27:13 +0000 (15:27 +0000)
author Hal Finkel <hfinkel@anl.gov>
Fri, 15 Mar 2013 15:27:13 +0000 (15:27 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Fri, 15 Mar 2013 15:27:13 +0000 (15:27 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 13cb358fc028f01dc33fbadb2badc80af1cdc09e..a7e9d56fa9d6b402f9e1f6095989d1e89206176d 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
  static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
  cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
  
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
  static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
    if (TM.getSubtargetImpl()->isDarwin())
      return new TargetLoweringObjectFileMachO();
@@ -6851,6 +6854,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
    }
  }
  
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+                                                      bool *Fast) const {
+  if (DisablePPCUnaligned)
+    return false;
+
+  // PowerPC supports unaligned memory access for simple non-vector types.
+  // Although accessing unaligned addresses is not as efficient as accessing
+  // aligned addresses, it is generally more efficient than manual expansion,
+  // and generally only traps for software emulation when crossing page
+  // boundaries.
+
+  if (!VT.isSimple())
+    return false;
+
+  if (VT.getSimpleVT().isVector())
+    return false;
+
+  if (VT == MVT::ppcf128)
+    return false;
+
+  if (Fast)
+    *Fast = true;
+
+  return true;
+}
+
  /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
  /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
  /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index 3931384d8901ba588d2b6cdbef8ed891f341de93..8d44d9ff46b00919892a3357f14f98b8d1a6b726 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -449,6 +449,10 @@ namespace llvm {
                          bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                          MachineFunction &MF) const;
  
+    /// Is unaligned memory access allowed for the given type, and is it fast
+    /// relative to software emulation.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+
      /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
      /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
      /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll

index 12f1d1f130d86c9b6d41b0c59206accf1785b267..98951306fd8e9ff1418266f4390af2a495d75938 100644 (file)
--- a/test/CodeGen/PowerPC/lbzux.ll
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -1,6 +1,6 @@
  target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
  target triple = "powerpc64-unknown-linux-gnu"
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
  
  define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
  entry:
diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll

index 884d3a89d15aa21aab7fa37fef1a1ce3ee498a81..59b108a8144c696d194cf6edd3e6f0a772bd6018 100644 (file)
--- a/test/CodeGen/PowerPC/structsinmem.ll
+++ b/test/CodeGen/PowerPC/structsinmem.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s
  
  ; FIXME: The code generation for packed structs is very poor because the
  ; PowerPC target wrongly rejects all unaligned loads.  This test case will
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll

index c7cbfa8ee6ace94a2c2b27fd4959d54a3d6633d4..c746039f43543d9d19e8e7c147022a76bb4e1ce8 100644 (file)
--- a/test/CodeGen/PowerPC/structsinregs.ll
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s
  
  ; FIXME: The code generation for packed structs is very poor because the
  ; PowerPC target wrongly rejects all unaligned loads.  This test case will
diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll

index 897bfc6d6caaf80922e4f5fe2601c68a96e3c60a..e0bd043454396a21635ee9cecd7c8b2ccce4e949 100644 (file)
--- a/test/CodeGen/PowerPC/stwu8.ll
+++ b/test/CodeGen/PowerPC/stwu8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
  target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
  target triple = "powerpc64-unknown-linux-gnu"
  
diff --git a/test/CodeGen/PowerPC/unaligned.ll b/test/CodeGen/PowerPC/unaligned.ll

new file mode 100644 (file)

index 0000000..d050803
--- /dev/null
+++ b/test/CodeGen/PowerPC/unaligned.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define void @foo1(i16* %p, i16* %r) nounwind {
+entry:
+  %v = load i16* %p, align 1
+  store i16 %v, i16* %r, align 1
+  ret void
+
+; CHECK: @foo1
+; CHECK: lhz
+; CHECK: sth
+}
+
+define void @foo2(i32* %p, i32* %r) nounwind {
+entry:
+  %v = load i32* %p, align 1
+  store i32 %v, i32* %r, align 1
+  ret void
+
+; CHECK: @foo2
+; CHECK: lwz
+; CHECK: stw
+}
+
+define void @foo3(i64* %p, i64* %r) nounwind {
+entry:
+  %v = load i64* %p, align 1
+  store i64 %v, i64* %r, align 1
+  ret void
+
+; CHECK: @foo3
+; CHECK: ld
+; CHECK: std
+}
+
+define void @foo4(float* %p, float* %r) nounwind {
+entry:
+  %v = load float* %p, align 1
+  store float %v, float* %r, align 1
+  ret void
+
+; CHECK: @foo4
+; CHECK: lfs
+; CHECK: stfs
+}
+
+define void @foo5(double* %p, double* %r) nounwind {
+entry:
+  %v = load double* %p, align 1
+  store double %v, double* %r, align 1
+  ret void
+
+; CHECK: @foo5
+; CHECK: lfd
+; CHECK: stfd
+}
+
+define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
+entry:
+  %v = load <4 x float>* %p, align 1
+  store <4 x float> %v, <4 x float>* %r, align 1
+  ret void
+
+; These loads and stores are legalized into aligned loads and stores
+; using aligned stack slots.
+; CHECK: @foo6
+; CHECK: ld
+; CHECK: ld
+; CHECK: std
+; CHECK: std
+}
+
author	Hal Finkel <hfinkel@anl.gov>
	Fri, 15 Mar 2013 15:27:13 +0000 (15:27 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Fri, 15 Mar 2013 15:27:13 +0000 (15:27 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
test/CodeGen/PowerPC/lbzux.ll		patch \| blob \| history
test/CodeGen/PowerPC/structsinmem.ll		patch \| blob \| history
test/CodeGen/PowerPC/structsinregs.ll		patch \| blob \| history
test/CodeGen/PowerPC/stwu8.ll		patch \| blob \| history
test/CodeGen/PowerPC/unaligned.ll	[new file with mode: 0644]	patch \| blob