apple
diff --git a/‎docs/ReleaseNotes.rst
Lines changed: 44 additions & 1 deletion b/‎docs/ReleaseNotes.rst
Lines changed: 44 additions & 1 deletion
diff --git a/‎docs/UndefinedBehaviorSanitizer.rst
Lines changed: 2 additions & 1 deletion b/‎docs/UndefinedBehaviorSanitizer.rst
Lines changed: 2 additions & 1 deletion
diff --git a/‎lib/CodeGen/CGExprScalar.cpp
Lines changed: 99 additions & 50 deletions b/‎lib/CodeGen/CGExprScalar.cpp
Lines changed: 99 additions & 50 deletions
diff --git a/‎test/CodeGen/catch-nullptr-and-nonzero-offset-blacklist.c
Lines changed: 34 additions & 0 deletions b/‎test/CodeGen/catch-nullptr-and-nonzero-offset-blacklist.c
Lines changed: 34 additions & 0 deletions
diff --git a/‎test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
Lines changed: 35 additions & 0 deletions b/‎test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
Lines changed: 35 additions & 0 deletions
@@ -60,6 +60,16 @@ Improvements to Clang's diagnostics
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
+* In both C and C++ (C17 ``6.5.6p8``, C++ ``[expr.add]``), pointer arithmetic is
+  only permitted within arrays. In particular, the behavior of a program is not
+  defined if it adds a non-zero offset (or in C, any offset) to a null pointer,
+  or if it forms a null pointer by subtracting an integer from a non-null
+  pointer, and the LLVM optimizer now uses those guarantees for transformations.
+  This may lead to unintended behavior in code that performs these operations.
+  The Undefined Behavior Sanitizer ``-fsanitize=pointer-overflow`` check has
+  been extended to detect these cases, so that code relying on them can be
+  detected and fixed.
+
 - For X86 target, -march=skylake-avx512, -march=icelake-client,
   -march=icelake-server, -march=cascadelake, -march=cooperlake will default to
   not using 512-bit zmm registers in vectorized code unless 512-bit intrinsics
@@ -238,7 +248,40 @@ Static Analyzer
 Undefined Behavior Sanitizer (UBSan)
 ------------------------------------
 
-- ...
+- * The ``pointer-overflow`` check was extended added to catch the cases where
+    a non-zero offset is applied to a null pointer, or the result of
+    applying the offset is a null pointer.
+
+    .. code-block:: c++
+
+      #include <cstdint> // for intptr_t
+
+      static char *getelementpointer_inbounds(char *base, unsigned long offset) {
+        // Potentially UB.
+        return base + offset;
+      }
+
+      char *getelementpointer_unsafe(char *base, unsigned long offset) {
+        // Always apply offset. UB if base is ``nullptr`` and ``offset`` is not
+        // zero, or if ``base`` is non-``nullptr`` and ``offset`` is
+        // ``-reinterpret_cast<intptr_t>(base)``.
+        return getelementpointer_inbounds(base, offset);
+      }
+
+      char *getelementpointer_safe(char *base, unsigned long offset) {
+        // Cast pointer to integer, perform usual arithmetic addition,
+        // and cast to pointer. This is legal.
+        char *computed =
+            reinterpret_cast<char *>(reinterpret_cast<intptr_t>(base) + offset);
+        // If either the pointer becomes non-``nullptr``, or becomes
+        // ``nullptr``, we must use ``computed`` result.
+        if (((base == nullptr) && (computed != nullptr)) ||
+            ((base != nullptr) && (computed == nullptr)))
+          return computed;
+        // Else we can use ``getelementpointer_inbounds()``.
+        return getelementpointer_inbounds(base, offset);
+      }
+
 
 Core Analysis Improvements
 ==========================
 
@@ -130,7 +130,8 @@ Available checks are:
      ``__builtin_object_size``, and consequently may be able to detect more
      problems at higher optimization levels.
   -  ``-fsanitize=pointer-overflow``: Performing pointer arithmetic which
-     overflows.
+     overflows, or where either the old or new pointer value is a null pointer
+     (or in C, when they both are).
   -  ``-fsanitize=return``: In C++, reaching the end of a
      value-returning function without returning a value.
   -  ``-fsanitize=returns-nonnull-attribute``: Returning null pointer
 
@@ -4542,18 +4542,36 @@ struct GEPOffsetAndOverflow {
   llvm::Value *OffsetOverflows;
 };
 
-/// Evaluate given GEPVal, which must be an inbounds GEP,
+/// Evaluate given GEPVal, which is either an inbounds GEP, or a constant,
 /// and compute the total offset it applies from it's base pointer BasePtr.
 /// Returns offset in bytes and a boolean flag whether an overflow happened
 /// during evaluation.
 static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal,
                                                  llvm::LLVMContext &VMContext,
                                                  CodeGenModule &CGM,
                                                  CGBuilderTy Builder) {
+  const auto &DL = CGM.getDataLayout();
+
+  // The total (signed) byte offset for the GEP.
+  llvm::Value *TotalOffset = nullptr;
+
+  // Was the GEP already reduced to a constant?
+  if (isa<llvm::Constant>(GEPVal)) {
+    // Compute the offset by casting both pointers to integers and subtracting:
+    // GEPVal = BasePtr + ptr(Offset) <--> Offset = int(GEPVal) - int(BasePtr)
+    Value *BasePtr_int =
+        Builder.CreatePtrToInt(BasePtr, DL.getIntPtrType(BasePtr->getType()));
+    Value *GEPVal_int =
+        Builder.CreatePtrToInt(GEPVal, DL.getIntPtrType(GEPVal->getType()));
+    TotalOffset = Builder.CreateSub(GEPVal_int, BasePtr_int);
+    return {TotalOffset, /*OffsetOverflows=*/Builder.getFalse()};
+  }
+
   auto *GEP = cast<llvm::GEPOperator>(GEPVal);
+  assert(GEP->getPointerOperand() == BasePtr &&
+         "BasePtr must be the the base of the GEP.");
   assert(GEP->isInBounds() && "Expected inbounds GEP");
 
-  const auto &DL = CGM.getDataLayout();
   auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType());
 
   // Grab references to the signed add/mul overflow intrinsics for intptr_t.
@@ -4563,8 +4581,6 @@ static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal,
   auto *SMulIntrinsic =
       CGM.getIntrinsic(llvm::Intrinsic::smul_with_overflow, IntPtrTy);
 
-  // The total (signed) byte offset for the GEP.
-  llvm::Value *TotalOffset = nullptr;
   // The offset overflow flag - true if the total offset overflows.
   llvm::Value *OffsetOverflows = Builder.getFalse();
 
@@ -4635,69 +4651,102 @@ CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList,
   if (!SanOpts.has(SanitizerKind::PointerOverflow))
     return GEPVal;
 
-  // If the GEP has already been reduced to a constant, leave it be.
-  if (isa<llvm::Constant>(GEPVal))
-    return GEPVal;
+  llvm::Type *PtrTy = Ptr->getType();
+
+  // Perform nullptr-and-offset check unless the nullptr is defined.
+  bool PerformNullCheck = !NullPointerIsDefined(
+      Builder.GetInsertBlock()->getParent(), PtrTy->getPointerAddressSpace());
+  // Check for overflows unless the GEP got constant-folded,
+  // and only in the default address space
+  bool PerformOverflowCheck =
+      !isa<llvm::Constant>(GEPVal) && PtrTy->getPointerAddressSpace() == 0;
 
-  // Only check for overflows in the default address space.
-  if (GEPVal->getType()->getPointerAddressSpace())
+  if (!(PerformNullCheck || PerformOverflowCheck))
     return GEPVal;
 
+  const auto &DL = CGM.getDataLayout();
+
   SanitizerScope SanScope(this);
+  llvm::Type *IntPtrTy = DL.getIntPtrType(PtrTy);
 
   GEPOffsetAndOverflow EvaluatedGEP =
       EmitGEPOffsetInBytes(Ptr, GEPVal, getLLVMContext(), CGM, Builder);
 
-  auto *GEP = cast<llvm::GEPOperator>(GEPVal);
-
-  const auto &DL = CGM.getDataLayout();
-  auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType());
+  assert((!isa<llvm::Constant>(EvaluatedGEP.TotalOffset) ||
+          EvaluatedGEP.OffsetOverflows == Builder.getFalse()) &&
+         "If the offset got constant-folded, we don't expect that there was an "
+         "overflow.");
 
   auto *Zero = llvm::ConstantInt::getNullValue(IntPtrTy);
 
-  // Common case: if the total offset is zero, don't emit a check.
-  if (EvaluatedGEP.TotalOffset == Zero)
+  // Common case: if the total offset is zero, and we are using C++ semantics,
+  // where nullptr+0 is defined, don't emit a check.
+  if (EvaluatedGEP.TotalOffset == Zero && CGM.getLangOpts().CPlusPlus)
     return GEPVal;
 
   // Now that we've computed the total offset, add it to the base pointer (with
   // wrapping semantics).
-  auto *IntPtr = Builder.CreatePtrToInt(GEP->getPointerOperand(), IntPtrTy);
+  auto *IntPtr = Builder.CreatePtrToInt(Ptr, IntPtrTy);
   auto *ComputedGEP = Builder.CreateAdd(IntPtr, EvaluatedGEP.TotalOffset);
 
-  llvm::SmallVector<std::pair<llvm::Value *, SanitizerMask>, 1> Checks;
-
-  // The GEP is valid if:
-  // 1) The total offset doesn't overflow, and
-  // 2) The sign of the difference between the computed address and the base
-  // pointer matches the sign of the total offset.
-  llvm::Value *ValidGEP;
-  auto *NoOffsetOverflow = Builder.CreateNot(EvaluatedGEP.OffsetOverflows);
-  if (SignedIndices) {
-    // GEP is computed as `unsigned base + signed offset`, therefore:
-    // * If offset was positive, then the computed pointer can not be
-    //   [unsigned] less than the base pointer, unless it overflowed.
-    // * If offset was negative, then the computed pointer can not be
-    //   [unsigned] greater than the bas pointere, unless it overflowed.
-    auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
-    auto *PosOrZeroOffset =
-        Builder.CreateICmpSGE(EvaluatedGEP.TotalOffset, Zero);
-    llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr);
-    ValidGEP = Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid);
-  } else if (!IsSubtraction) {
-    // GEP is computed as `unsigned base + unsigned offset`,  therefore the
-    // computed pointer can not be [unsigned] less than base pointer,
-    // unless there was an overflow.
-    // Equivalent to `@llvm.uadd.with.overflow(%base, %offset)`.
-    ValidGEP = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
-  } else {
-    // GEP is computed as `unsigned base - unsigned offset`, therefore the
-    // computed pointer can not be [unsigned] greater than base pointer,
-    // unless there was an overflow.
-    // Equivalent to `@llvm.usub.with.overflow(%base, sub(0, %offset))`.
-    ValidGEP = Builder.CreateICmpULE(ComputedGEP, IntPtr);
-  }
-  ValidGEP = Builder.CreateAnd(ValidGEP, NoOffsetOverflow);
-  Checks.emplace_back(ValidGEP, SanitizerKind::PointerOverflow);
+  llvm::SmallVector<std::pair<llvm::Value *, SanitizerMask>, 2> Checks;
+
+  if (PerformNullCheck) {
+    // In C++, if the base pointer evaluates to a null pointer value,
+    // the only valid  pointer this inbounds GEP can produce is also
+    // a null pointer, so the offset must also evaluate to zero.
+    // Likewise, if we have non-zero base pointer, we can not get null pointer
+    // as a result, so the offset can not be -intptr_t(BasePtr).
+    // In other words, both pointers are either null, or both are non-null,
+    // or the behaviour is undefined.
+    //
+    // C, however, is more strict in this regard, and gives more
+    // optimization opportunities: in C, additionally, nullptr+0 is undefined.
+    // So both the input to the 'gep inbounds' AND the output must not be null.
+    auto *BaseIsNotNullptr = Builder.CreateIsNotNull(Ptr);
+    auto *ResultIsNotNullptr = Builder.CreateIsNotNull(ComputedGEP);
+    auto *Valid =
+        CGM.getLangOpts().CPlusPlus
+            ? Builder.CreateICmpEQ(BaseIsNotNullptr, ResultIsNotNullptr)
+            : Builder.CreateAnd(BaseIsNotNullptr, ResultIsNotNullptr);
+    Checks.emplace_back(Valid, SanitizerKind::PointerOverflow);
+  }
+
+  if (PerformOverflowCheck) {
+    // The GEP is valid if:
+    // 1) The total offset doesn't overflow, and
+    // 2) The sign of the difference between the computed address and the base
+    // pointer matches the sign of the total offset.
+    llvm::Value *ValidGEP;
+    auto *NoOffsetOverflow = Builder.CreateNot(EvaluatedGEP.OffsetOverflows);
+    if (SignedIndices) {
+      // GEP is computed as `unsigned base + signed offset`, therefore:
+      // * If offset was positive, then the computed pointer can not be
+      //   [unsigned] less than the base pointer, unless it overflowed.
+      // * If offset was negative, then the computed pointer can not be
+      //   [unsigned] greater than the bas pointere, unless it overflowed.
+      auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
+      auto *PosOrZeroOffset =
+          Builder.CreateICmpSGE(EvaluatedGEP.TotalOffset, Zero);
+      llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr);
+      ValidGEP =
+          Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid);
+    } else if (!IsSubtraction) {
+      // GEP is computed as `unsigned base + unsigned offset`,  therefore the
+      // computed pointer can not be [unsigned] less than base pointer,
+      // unless there was an overflow.
+      // Equivalent to `@llvm.uadd.with.overflow(%base, %offset)`.
+      ValidGEP = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
+    } else {
+      // GEP is computed as `unsigned base - unsigned offset`, therefore the
+      // computed pointer can not be [unsigned] greater than base pointer,
+      // unless there was an overflow.
+      // Equivalent to `@llvm.usub.with.overflow(%base, sub(0, %offset))`.
+      ValidGEP = Builder.CreateICmpULE(ComputedGEP, IntPtr);
+    }
+    ValidGEP = Builder.CreateAnd(ValidGEP, NoOffsetOverflow);
+    Checks.emplace_back(ValidGEP, SanitizerKind::PointerOverflow);
+  }
 
   assert(!Checks.empty() && "Should have produced some checks.");
 
 
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -x c -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-NULL-IS-INVALID-PTR
+// RUN: %clang_cc1 -x c -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-NULL-IS-VALID-PTR
+
+// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-NULL-IS-INVALID-PTR
+// RUN: %clang_cc1 -x c++ -fno-delete-null-pointer-checks -fsanitize=pointer-overflow -fsanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_pointer_overflow" --check-prefixes=CHECK,CHECK-NULL-IS-VALID-PTR
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// CHECK-LABEL: @baseline
+char *baseline(char *base, unsigned long offset) {
+  // CHECK: call void @__ubsan_handle_pointer_overflow(
+  return base + offset;
+}
+
+// CHECK-LABEL: @blacklist_0
+__attribute__((no_sanitize("undefined"))) char *blacklist_0(char *base, unsigned long offset) {
+  return base + offset;
+}
+
+// CHECK-LABEL: @blacklist_1
+__attribute__((no_sanitize("pointer-overflow"))) char *blacklist_1(char *base, unsigned long offset) {
+  return base + offset;
+}
+
+// CHECK-LABEL: @ignore_non_default_address_space
+__attribute__((address_space(1))) char *ignore_non_default_address_space(__attribute__((address_space(1))) char *base, unsigned long offset) {
+  return base + offset;
+}
+
+#ifdef __cplusplus
+}
+#endif
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -x c   -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -x c   -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+
+// RUN: %clang_cc1 -x c++ -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -x c++ -fsanitize=pointer-overflow -fno-sanitize-recover=pointer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct S {
+  int x, y;
+};
+
+// CHECK-LABEL: define i64 @{{.*}}get_offset_of_y_naively{{.*}}(
+uintptr_t get_offset_of_y_naively() {
+  // CHECK: [[ENTRY:.*]]:
+  // CHECK-NEXT:   ret i64 ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
+  // CHECK-NEXT: }
+  return ((uintptr_t)(&(((struct S *)0)->y)));
+}
+
+// CHECK-LABEL: define i64 @{{.*}}get_offset_of_y_via_builtin{{.*}}(
+uintptr_t get_offset_of_y_via_builtin() {
+  // CHECK: [[ENTRY:.*]]:
+  // CHECK-NEXT:   ret i64 4
+  // CHECK-NEXT: }
+  return __builtin_offsetof(struct S, y);
+}
+
+#ifdef __cplusplus
+}
+#endif