diff --git a/clang/include/clang/CIR/Dialect/IR/CIRDataLayout.h b/clang/include/clang/CIR/Dialect/IR/CIRDataLayout.h
index 89a724594081..b192ab701ceb 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRDataLayout.h
+++ b/clang/include/clang/CIR/Dialect/IR/CIRDataLayout.h
@@ -28,6 +28,8 @@ class StructLayout;
 class CIRDataLayout {
   bool bigEndian = false;
 
+  unsigned defaultGlobalsAddrSpace = 0;
+
   /// Primitive type alignment data. This is sorted by type and bit
   /// width during construction.
   llvm::DataLayout::PrimitiveSpec StructAlignment;
@@ -106,6 +108,10 @@ class CIRDataLayout {
         cir::IntType::get(Ty.getContext(), getPointerTypeSizeInBits(Ty), false);
     return IntTy;
   }
+
+  unsigned getDefaultGlobalsAddressSpace() const {
+    return defaultGlobalsAddrSpace;
+  }
 };
 
 /// Used to lazily calculate structure layout information for a target machine,
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index dcb433aedaaa..3ac17b110847 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -2418,6 +2418,7 @@ def GlobalOp : CIR_Op<"global",
                        UnitAttr:$comdat,
                        UnitAttr:$constant,
                        UnitAttr:$dsolocal,
+                       UnitAttr:$static_local,
                        OptionalAttr<I64Attr>:$alignment,
                        OptionalAttr<ASTVarDeclInterface>:$ast,
                        OptionalAttr<StrAttr>:$section,
@@ -2503,7 +2504,9 @@ def GetGlobalOp : CIR_Op<"get_global",
     ```
   }];
 
-  let arguments = (ins FlatSymbolRefAttr:$name, UnitAttr:$tls);
+  let arguments = (ins FlatSymbolRefAttr:$name,
+                                UnitAttr:$tls,
+                                UnitAttr:$static_local);
   let results = (outs Res<CIR_PointerType, "", []>:$addr);
 
   let assemblyFormat = [{
diff --git a/clang/include/clang/CIR/Interfaces/ASTAttrInterfaces.td b/clang/include/clang/CIR/Interfaces/ASTAttrInterfaces.td
index 33f3cffed030..b1d3dbe6c853 100644
--- a/clang/include/clang/CIR/Interfaces/ASTAttrInterfaces.td
+++ b/clang/include/clang/CIR/Interfaces/ASTAttrInterfaces.td
@@ -64,10 +64,42 @@ let cppNamespace = "::cir" in {
             MangleCtx->mangleDynamicInitializer($_attr.getAst(), Out);
           }]
       >,
+      InterfaceMethod<"", "void", "mangleStaticGuardVariable", (ins "llvm::raw_ostream&":$Out), [{}],
+        /*defaultImplementation=*/ [{
+          std::unique_ptr<clang::MangleContext> mangleCtx(
+            $_attr.getAst()->getASTContext().createMangleContext());
+            mangleCtx->mangleStaticGuardVariable($_attr.getAst(), Out);
+          }]
+      >,
       InterfaceMethod<"", "clang::VarDecl::TLSKind", "getTLSKind", (ins), [{}],
         /*defaultImplementation=*/ [{
           return $_attr.getAst()->getTLSKind();
         }]
+      >,
+      InterfaceMethod<"", "bool", "isInline", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->isInline();
+        }]
+      >,
+      InterfaceMethod<"", "clang::TemplateSpecializationKind", "getTemplateSpecializationKind", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->getTemplateSpecializationKind();
+        }]
+      >,
+      InterfaceMethod<"", "bool", "isLocalVarDecl", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->isLocalVarDecl();
+        }]
+      >,
+      InterfaceMethod<"", "clang::SourceLocation", "getLocation", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst()->getLocation();
+        }]
+      >,
+      InterfaceMethod<"", "const clang::VarDecl *", "getRawDecl", (ins), [{}],
+        /*defaultImplementation=*/ [{
+          return $_attr.getAst();
+        }]
       >
     ];
   }
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 5ab4473bb7ec..8ac8bfc69d6f 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -118,6 +118,8 @@ struct MissingFeatures {
   static bool setFunctionAttributes() { return false; }
   static bool attributeBuiltin() { return false; }
   static bool attributeNoBuiltin() { return false; }
+  static bool functionIndexAttribute() { return false; }
+  static bool noUnwindAttribute() { return false; }
   static bool parameterAttributes() { return false; }
   static bool minLegalVectorWidthAttr() { return false; }
   static bool vscaleRangeAttr() { return false; }
@@ -151,6 +153,7 @@ struct MissingFeatures {
 
   // Folding methods.
   static bool foldBinOpFMF() { return false; }
+  static bool folder() { return false; }
 
   // Fast math.
   static bool fastMathGuard() { return false; }
@@ -454,6 +457,10 @@ struct MissingFeatures {
   static bool mustProgress() { return false; }
 
   static bool skipTempCopy() { return false; }
+
+  static bool addressSpaceInGlobalVar() { return false; }
+
+  static bool useARMGuardVarABI() { return false; }
 };
 
 } // namespace cir
diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h
index 7643f9b87992..9e1f846ae320 100644
--- a/clang/lib/CIR/CodeGen/Address.h
+++ b/clang/lib/CIR/CodeGen/Address.h
@@ -33,6 +33,10 @@ class Address {
   mlir::Type ElementType;
   clang::CharUnits Alignment;
 
+  /// Offset from the base pointer. This is non-null only when the base pointer
+  /// is signed.
+  mlir::Attribute offset = nullptr;
+
 protected:
   Address(std::nullptr_t) : ElementType(nullptr) {}
 
@@ -134,6 +138,8 @@ class Address {
     return *this;
   }
 
+  bool hasOffset() const { return bool(offset); }
+
   /// Get the operation which defines this address.
   mlir::Operation *getDefiningOp() const {
     if (!isValid())
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index 28be733f62d7..a2273c3b2668 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -25,6 +25,7 @@
 #include "clang/CIR/Dialect/IR/FPEnv.h"
 
 #include "mlir/IR/Attributes.h"
+#include "mlir/IR/Block.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/BuiltinOps.h"
@@ -817,6 +818,35 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
     return Address{createImagPtr(loc, addr.getPointer()), addr.getAlignment()};
   }
 
+  /// Return a boolean value testing if \p arg == 0.
+  mlir::Value createIsNull(mlir::Location loc, mlir::Value arg,
+                           const llvm::Twine &name = "") {
+    return createICmpEQ(loc, arg, getNullValue(arg.getType(), loc), name);
+  }
+
+  /// Return a boolean value testing if \p arg != 0.
+  mlir::Value createIsNotNull(mlir::Location loc, mlir::Value arg,
+                              const llvm::Twine &name = "") {
+    return createICmpNE(loc, arg, getNullValue(arg.getType(), loc), name);
+  }
+
+  mlir::Value createICmpEQ(mlir::Location loc, mlir::Value lhs, mlir::Value rhs,
+                           const llvm::Twine &name = "") {
+    return createICmp(loc, cir::CmpOpKind::eq, lhs, rhs, name);
+  }
+  mlir::Value createICmpNE(mlir::Location loc, mlir::Value lhs, mlir::Value rhs,
+                           const llvm::Twine &name = "") {
+    return createICmp(loc, cir::CmpOpKind::ne, lhs, rhs, name);
+  }
+
+  mlir::Value createICmp(mlir::Location loc, cir::CmpOpKind kind,
+                         mlir::Value lhs, mlir::Value rhs,
+                         const llvm::Twine &name = "") {
+    if (cir::MissingFeatures::folder())
+      llvm_unreachable("NYI");
+    return createCompare(loc, kind, lhs, rhs);
+  }
+
   /// Cast the element type of the given address to a different type,
   /// preserving information like the alignment.
   Address createElementBitCast(mlir::Location loc, Address addr,
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
index 127d59c54892..47129ce4e3d0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
@@ -172,6 +172,19 @@ class CIRGenCXXABI {
                                   bool ForVirtualBase, bool Delegating,
                                   Address This, QualType ThisTy) = 0;
 
+  /*************************** Static local guards ****************************/
+
+  /// Emits the guarded initializer and destructor setup for the given
+  /// variable, given that it couldn't be emitted as a constant.
+  /// If \p PerformInit is false, the initialization has been folded to a
+  /// constant and should not be performed.
+  ///
+  /// The variable may be:
+  ///   - a static local variable
+  ///   - a static data member of a class template instantiation
+  virtual void emitGuardedInit(CIRGenFunction &cgf, const VarDecl &varDecl,
+                               cir::GlobalOp globalOp, bool performInit) = 0;
+
   /// Emit code to force the execution of a destructor during global
   /// teardown.  The default implementation of this uses atexit.
   ///
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
index 6e31e2f41311..0c02a5b2ce95 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CIRGenCall.h"
 #include "CIRGenBuilder.h"
 #include "CIRGenCXXABI.h"
 #include "CIRGenFunction.h"
@@ -19,8 +20,10 @@
 #include "TargetInfo.h"
 
 #include "clang/AST/Attr.h"
+#include "clang/AST/Attrs.inc"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/GlobalDecl.h"
+#include "clang/CIR/ABIArgInfo.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/FnInfoOpts.h"
@@ -31,6 +34,7 @@
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Location.h"
 #include "mlir/IR/SymbolTable.h"
 #include "mlir/IR/Types.h"
 #include "clang/CIR/MissingFeatures.h"
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index c4d53a8477ec..cdc1fde1348f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -539,70 +539,98 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &D,
 /// Add the initializer for 'D' to the global variable that has already been
 /// created for it. If the initializer has a different type than GV does, this
 /// may free GV and return a different one. Otherwise it just returns GV.
-cir::GlobalOp CIRGenFunction::addInitializerToStaticVarDecl(
-    const VarDecl &D, cir::GlobalOp GV, cir::GetGlobalOp GVAddr) {
+cir::GlobalOp
+CIRGenFunction::addInitializerToStaticVarDecl(const VarDecl &varDecl,
+                                              cir::GlobalOp globalOp,
+                                              cir::GetGlobalOp getGlobalOp) {
   ConstantEmitter emitter(*this);
-  mlir::TypedAttr Init =
-      mlir::dyn_cast<mlir::TypedAttr>(emitter.tryEmitForInitializer(D));
-  assert(Init && "Expected typed attribute");
+  mlir::Attribute init = emitter.tryEmitForInitializer(varDecl);
 
   // If constant emission failed, then this should be a C++ static
   // initializer.
-  if (!Init) {
+  if (!init) {
     if (!getLangOpts().CPlusPlus)
-      CGM.ErrorUnsupported(D.getInit(), "constant l-value expression");
-    else if (D.hasFlexibleArrayInit(getContext()))
-      CGM.ErrorUnsupported(D.getInit(), "flexible array initializer");
+      CGM.ErrorUnsupported(varDecl.getInit(), "constant l-value expression");
+    else if (varDecl.hasFlexibleArrayInit(getContext()))
+      CGM.ErrorUnsupported(varDecl.getInit(), "flexible array initializer");
     else {
       // Since we have a static initializer, this global variable can't
       // be constant.
-      GV.setConstant(false);
-      llvm_unreachable("C++ guarded init it NYI");
+      globalOp.setConstant(false);
+
+      emitCXXGuardedInit(varDecl, globalOp, /*performInit*/ true);
+      getGlobalOp.setStaticLocal(true);
     }
-    return GV;
+    return globalOp;
   }
 
+  auto typedInit = mlir::cast<mlir::TypedAttr>(init);
+
 #ifndef NDEBUG
-  CharUnits VarSize = CGM.getASTContext().getTypeSizeInChars(D.getType()) +
-                      D.getFlexibleArrayInitChars(getContext());
-  CharUnits CstSize = CharUnits::fromQuantity(
-      CGM.getDataLayout().getTypeAllocSize(Init.getType()));
-  assert(VarSize == CstSize && "Emitted constant has unexpected size");
+  CharUnits varSize =
+      CGM.getASTContext().getTypeSizeInChars(varDecl.getType()) +
+      varDecl.getFlexibleArrayInitChars(getContext());
+  CharUnits cstSize = CharUnits::fromQuantity(
+      CGM.getDataLayout().getTypeAllocSize(typedInit.getType()));
+  assert(varSize == cstSize && "Emitted constant has unexpected size");
 #endif
 
   // The initializer may differ in type from the global. Rewrite
   // the global to match the initializer.  (We have to do this
   // because some types, like unions, can't be completely represented
   // in the LLVM type system.)
-  if (GV.getSymType() != Init.getType()) {
-    GV.setSymType(Init.getType());
+  // NOTE(CIR): This was removed in OG since opaque pointers made it trivial. We
+  // need it since we still have typed pointers.
+  if (globalOp.getSymType() != typedInit.getType()) {
+    globalOp.setSymType(typedInit.getType());
+
+    cir::GlobalOp oldGlobalOp = globalOp;
+    globalOp =
+        builder.createGlobal(CGM.getModule(), getLoc(varDecl.getSourceRange()),
+                             oldGlobalOp.getName(), typedInit.getType(),
+                             oldGlobalOp.getConstant(), globalOp.getLinkage());
+    // FIXME(cir): OG codegen inserts new GV before old one, we probably don't
+    // need that?
+    globalOp.setVisibility(oldGlobalOp.getVisibility());
+    globalOp.setGlobalVisibilityAttr(oldGlobalOp.getGlobalVisibilityAttr());
+    globalOp.setInitialValueAttr(init);
+    globalOp.setTlsModelAttr(oldGlobalOp.getTlsModelAttr());
+    globalOp.setDSOLocal(oldGlobalOp.getDsolocal());
+    assert(!cir::MissingFeatures::setComdat());
+    assert(!cir::MissingFeatures::addressSpaceInGlobalVar());
 
     // Normally this should be done with a call to CGM.replaceGlobal(OldGV, GV),
     // but since at this point the current block hasn't been really attached,
     // there's no visibility into the GetGlobalOp corresponding to this Global.
     // Given those constraints, thread in the GetGlobalOp and update it
     // directly.
-    GVAddr.getAddr().setType(
-        getBuilder().getPointerTo(Init.getType(), GV.getAddrSpaceAttr()));
+    getGlobalOp.getAddr().setType(getBuilder().getPointerTo(
+        typedInit.getType(), globalOp.getAddrSpaceAttr()));
+
+    // Replace all uses of the old global with the new global
+    oldGlobalOp->replaceAllUsesWith(globalOp);
+
+    // Erase the old global, since it is no longer used.
+    oldGlobalOp->erase();
   }
 
-  bool NeedsDtor =
-      D.needsDestruction(getContext()) == QualType::DK_cxx_destructor;
+  bool needsDtor =
+      varDecl.needsDestruction(getContext()) == QualType::DK_cxx_destructor;
 
-  GV.setConstant(
-      CGM.isTypeConstant(D.getType(), /*ExcludeCtor=*/true, !NeedsDtor));
-  GV.setInitialValueAttr(Init);
+  globalOp.setConstant(
+      CGM.isTypeConstant(varDecl.getType(), /*ExcludeCtor=*/true, !needsDtor));
+  globalOp.setInitialValueAttr(init);
 
-  emitter.finalize(GV);
+  emitter.finalize(globalOp);
 
-  if (NeedsDtor) {
+  if (needsDtor) {
     // We have a constant initializer, but a nontrivial destructor. We still
     // need to perform a guarded "initialization" in order to register the
     // destructor.
     llvm_unreachable("C++ guarded init is NYI");
   }
 
-  return GV;
+  return globalOp;
 }
 
 void CIRGenFunction::emitStaticVarDecl(const VarDecl &D,
diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclCXX.cpp
index 0b9fa80536de..430550979aa3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDeclCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDeclCXX.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CIRGenCXXABI.h"
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
 #include "TargetInfo.h"
@@ -51,3 +52,25 @@ void CIRGenModule::emitCXXGlobalVarDeclInitFunc(const VarDecl *D,
 
   emitCXXGlobalVarDeclInit(D, Addr, PerformInit);
 }
+
+void CIRGenFunction::emitCXXGuardedInit(const VarDecl &varDecl,
+                                        cir::GlobalOp globalOp,
+                                        bool performInit) {
+  // If we've been asked to forbid guard variables, emit an error now. This
+  // diagnostic is hard-coded for Darwin's use case; we can find better phrasing
+  // if someone else needs it.
+  if (CGM.getCodeGenOpts().ForbidGuardVariables)
+    llvm_unreachable("NYI");
+
+  CGM.getCXXABI().emitGuardedInit(*this, varDecl, globalOp, performInit);
+}
+
+void CIRGenFunction::emitCXXGlobalVarDeclInit(const VarDecl &varDecl,
+                                              cir::GlobalOp globalOp,
+                                              bool performInit) {
+  // TODO(CIR): We diverge from CodeGen here via having this in CIRGenModule
+  // instead. This is necessary due to the way we are constructing global inits
+  // at the moment. With LoweringPrepare being moved to CIRGen we should
+  // refactor this to live here.
+  llvm_unreachable("NYI");
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index cd7763f2ef79..6f2ba4248802 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -419,7 +419,7 @@ void CIRGenFunction::LexicalScope::cleanup() {
   // An empty non-entry block has nothing to offer, and since this is
   // synthetic, losing information does not affect anything.
   bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
-  if (!entryBlock && currBlock->empty()) {
+  if (!entryBlock && currBlock->empty() && currBlock->hasNoPredecessors()) {
     currBlock->erase();
     // Remove unused cleanup blocks.
     if (cleanupBlock && cleanupBlock->hasNoPredecessors())
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index b6b949e47bf2..bc1a6f2b0990 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -28,6 +28,7 @@
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/ABI.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CIR/TypeEvaluationKind.h"
 
@@ -920,6 +921,15 @@ class CIRGenFunction : public CIRGenTypeCache {
 
   LValue emitPointerToDataMemberBinaryExpr(const BinaryOperator *E);
 
+  /// Emit code in this function to perform a guarded variable initialization.
+  /// Guarded initializations are used when it's not possible to prove that
+  /// initialization will be done exactly once, e.g. with a static local
+  /// variable or a static data member of a class template.
+  void emitCXXGuardedInit(const VarDecl &varDecl, cir::GlobalOp globalOp,
+                          bool performInit);
+
+  enum class GuardKind { variableGuard, tlsGuard };
+
   /// TODO: Add TBAAAccessInfo
   Address emitCXXMemberDataPointerAddress(
       const Expr *E, Address base, mlir::Value memberPtr,
@@ -960,8 +970,14 @@ class CIRGenFunction : public CIRGenTypeCache {
   mlir::Value emitRuntimeCall(mlir::Location loc, cir::FuncOp callee,
                               llvm::ArrayRef<mlir::Value> args = {});
 
+  // Emit an invariant.start call for the given memory region.
   void emitInvariantStart(CharUnits Size);
 
+  /// emitCXXGlobalVarDeclInit - Create the initializer for a C++ variable with
+  /// global storage.
+  void emitCXXGlobalVarDeclInit(const VarDecl &varDecl, cir::GlobalOp globalOp,
+                                bool performInit);
+
   /// Create a check for a function parameter that may potentially be
   /// declared as non-null.
   void emitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc,
@@ -1440,9 +1456,9 @@ class CIRGenFunction : public CIRGenTypeCache {
   /// inside a function, including static vars etc.
   void emitVarDecl(const clang::VarDecl &D);
 
-  cir::GlobalOp addInitializerToStaticVarDecl(const VarDecl &D,
-                                              cir::GlobalOp GV,
-                                              cir::GetGlobalOp GVAddr);
+  cir::GlobalOp addInitializerToStaticVarDecl(const VarDecl &varDecl,
+                                              cir::GlobalOp globalOp,
+                                              cir::GetGlobalOp getGlobalOp);
 
   void emitStaticVarDecl(const VarDecl &D, cir::GlobalLinkageKind Linkage);
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
index ea5cd755ec26..2a6d6ebbd028 100644
--- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
@@ -17,17 +17,27 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CIRGenBuilder.h"
 #include "CIRGenCXXABI.h"
 #include "CIRGenCleanup.h"
+#include "CIRGenFunction.h"
 #include "CIRGenFunctionInfo.h"
+#include "CIRGenModule.h"
 #include "ConstantInitBuilder.h"
+#include "mlir/IR/Block.h"
+#include "mlir/IR/BuiltinAttributes.h"
 
 #include "clang/AST/GlobalDecl.h"
 #include "clang/AST/Mangle.h"
 #include "clang/AST/VTableBuilder.h"
 #include "clang/Basic/Linkage.h"
+#include "clang/Basic/Specifiers.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace clang;
@@ -185,6 +195,8 @@ class CIRGenItaniumCXXABI : public CIRGenCXXABI {
                           CXXDtorType Type, bool ForVirtualBase,
                           bool Delegating, Address This,
                           QualType ThisTy) override;
+  void emitGuardedInit(CIRGenFunction &cgf, const VarDecl &varDecl,
+                       cir::GlobalOp globalOp, bool performInit) override;
   void registerGlobalDtor(CIRGenFunction &CGF, const VarDecl *D,
                           cir::FuncOp dtor, mlir::Value Addr) override;
   virtual void emitRethrow(CIRGenFunction &CGF, bool isNoReturn) override;
@@ -2635,3 +2647,19 @@ CIRGenItaniumCXXABI::buildVirtualMethodAttr(cir::MethodType MethodTy,
 bool CIRGenItaniumCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
   return MPT->isMemberFunctionPointer();
 }
+
+/// The ARM code here follows the Itanium code closely enough that we just
+/// special-case it at particular places.
+void CIRGenItaniumCXXABI::emitGuardedInit(CIRGenFunction &cgf,
+                                          const VarDecl &varDecl,
+                                          cir::GlobalOp globalOp,
+                                          bool performInit) {
+
+  // Emit the initializer and add a global destructor if appropriate.
+  cgf.CGM.emitCXXGlobalVarDeclInit(&varDecl, globalOp, performInit);
+
+  // CIR diverges from IRGen here by emitting the init into the ctor region and
+  // marking the global as static local. The emission of the guard/acquire walk
+  // is done during LoweringPrepare.
+  globalOp.setStaticLocal(true);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 9d7c1eb572a6..e55cdd30e45d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -160,6 +160,7 @@ class CIRGenModule : public CIRGenTypeCache {
   CIRGenTypes &getTypes() { return genTypes; }
   const clang::LangOptions &getLangOpts() const { return langOpts; }
   CIRGenFunction *getCurrCIRGenFun() const { return CurCGF; }
+  void setCurrentCIRGenFn(CIRGenFunction *cgf) { CurCGF = cgf; }
   const cir::CIRDataLayout getDataLayout() const {
     // FIXME(cir): instead of creating a CIRDataLayout every time, set it as an
     // attribute for the CIRModule class.
diff --git a/clang/lib/CIR/CodeGen/LoweringPrepare.cpp b/clang/lib/CIR/CodeGen/LoweringPrepare.cpp
index 7d8dad7b82d3..7d3f7f1f0200 100644
--- a/clang/lib/CIR/CodeGen/LoweringPrepare.cpp
+++ b/clang/lib/CIR/CodeGen/LoweringPrepare.cpp
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "CIRGenModule.h"
+#include "Address.h"
+#include "CIRGenBuilder.h"
 
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/Region.h"
@@ -22,6 +23,7 @@
 #include "clang/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h"
 #include "clang/CIR/Dialect/Transforms/PassDetail.h"
 #include "clang/CIR/Interfaces/ASTAttrInterfaces.h"
+#include "clang/CIR/MissingFeatures.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
@@ -80,6 +82,7 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
   void lowerThreeWayCmpOp(CmpThreeWayOp op);
   void lowerVAArgOp(VAArgOp op);
   void lowerGlobalOp(GlobalOp op);
+  void lowerGetGlobalOp(GetGlobalOp op);
   void lowerDynamicCastOp(DynamicCastOp op);
   void lowerStdFindOp(StdFindOp op);
   void lowerIterBeginOp(IterBeginOp op);
@@ -88,6 +91,9 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
   void lowerArrayDtor(ArrayDtor op);
   void lowerArrayCtor(ArrayCtor op);
 
+  void handleStaticLocal(GlobalOp globalOp, GetGlobalOp getGlobalOp);
+  void handleGlobalOpCtorDtor(GlobalOp globalOp);
+
   /// Collect annotations of global values in the module
   void addGlobalAnnotations(mlir::Operation *op, mlir::ArrayAttr annotations);
 
@@ -103,6 +109,16 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
   /// Build attribute of global annotation values
   void buildGlobalAnnotationValues();
 
+  cir::GlobalOp
+  getStaticLocalDeclGuardAddress(cir::ASTVarDeclInterface varDecl) {
+    return staticLocalDeclGuardMap[varDecl];
+  }
+
+  void setStaticLocalDeclGuardAddress(cir::ASTVarDeclInterface varDecl,
+                                      cir::GlobalOp globalOp) {
+    staticLocalDeclGuardMap[varDecl] = globalOp;
+  }
+
   FuncOp buildRuntimeFunction(
       mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
       cir::FuncType type,
@@ -174,6 +190,9 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
   llvm::SmallVector<mlir::Attribute, 4> globalDtorList;
   /// List of annotations in the module
   llvm::SmallVector<mlir::Attribute, 4> globalAnnotations;
+
+  llvm::DenseMap<cir::ASTVarDeclInterface, cir::GlobalOp>
+      staticLocalDeclGuardMap;
 };
 } // namespace
 
@@ -358,7 +377,6 @@ void LoweringPreparePass::lowerVAArgOp(VAArgOp op) {
     op.replaceAllUsesWith(res);
     op.erase();
   }
-  return;
 }
 
 void LoweringPreparePass::lowerUnaryOp(UnaryOp op) {
@@ -867,14 +885,14 @@ void LoweringPreparePass::lowerThreeWayCmpOp(CmpThreeWayOp op) {
   op.erase();
 }
 
-void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
-  auto &ctorRegion = op.getCtorRegion();
-  auto &dtorRegion = op.getDtorRegion();
+void LoweringPreparePass::handleGlobalOpCtorDtor(GlobalOp globalOp) {
+  auto &ctorRegion = globalOp.getCtorRegion();
+  auto &dtorRegion = globalOp.getDtorRegion();
 
   if (!ctorRegion.empty() || !dtorRegion.empty()) {
     // Build a variable initialization function and move the initialzation code
     // in the ctor region over.
-    auto f = buildCXXGlobalVarDeclInitFunc(op);
+    auto f = buildCXXGlobalVarDeclInitFunc(globalOp);
 
     // Clear the ctor and dtor region
     ctorRegion.getBlocks().clear();
@@ -882,15 +900,522 @@ void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
 
     // Add a function call to the variable initialization function.
     assert(!hasAttr<clang::InitPriorityAttr>(
-               mlir::cast<ASTDeclInterface>(*op.getAst())) &&
+               mlir::cast<ASTDeclInterface>(*globalOp.getAst())) &&
            "custom initialization priority NYI");
     dynamicInitializers.push_back(f);
   }
 
-  std::optional<mlir::ArrayAttr> annotations = op.getAnnotations();
-  if (annotations) {
-    addGlobalAnnotations(op, annotations.value());
+  std::optional<mlir::ArrayAttr> annotations = globalOp.getAnnotations();
+  if (annotations)
+    addGlobalAnnotations(globalOp, annotations.value());
+}
+
+void LoweringPreparePass::lowerGetGlobalOp(GetGlobalOp getGlobalOp) {
+  if (!getGlobalOp.getStaticLocal())
+    return;
+
+  auto globalOp = mlir::cast<cir::GlobalOp>(
+      mlir::SymbolTable::lookupSymbolIn(theModule, getGlobalOp.getName()));
+
+  handleStaticLocal(globalOp, getGlobalOp);
+}
+
+void LoweringPreparePass::lowerGlobalOp(GlobalOp globalOp) {
+  if (!globalOp.getStaticLocal())
+    handleGlobalOpCtorDtor(globalOp);
+}
+
+static cir::GlobalOp createGuardGlobalOp(::cir::CIRBaseBuilderTy &builder,
+                                         mlir::Location loc, StringRef name,
+                                         mlir::Type type, bool isConstant,
+                                         cir::AddressSpaceAttr addrSpace,
+                                         cir::GlobalLinkageKind linkage,
+                                         cir::FuncOp curFn) {
+
+  cir::GlobalOp g;
+  {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+
+    // insert before the Fn requiring the guard var here
+    builder.setInsertionPoint(curFn);
+
+    g = builder.create<cir::GlobalOp>(loc, name, type, isConstant, linkage,
+                                      addrSpace);
+
+    // Default to private until we can judge based on the initializer,
+    // since MLIR doesn't allow public declarations.
+    mlir::SymbolTable::setSymbolVisibility(
+        g, mlir::SymbolTable::Visibility::Private);
+  }
+  return g;
+}
+
+static mlir::Operation *getGlobalValue(mlir::ModuleOp theModule,
+                                       StringRef name) {
+  auto *global = mlir::SymbolTable::lookupSymbolIn(theModule, name);
+  if (!global)
+    return {};
+  return global;
+}
+
+static cir::FuncOp createCIRFunction(clang::CIRGen::CIRGenBuilderTy &builder,
+                                     mlir::MLIRContext &mlirContext,
+                                     mlir::ModuleOp theModule,
+                                     mlir::Location loc, StringRef name,
+                                     cir::FuncType type) {
+  // At the point we need to create the function, the insertion point
+  // could be anywhere (e.g. callsite). Do not rely on whatever it might
+  // be, properly save, find the appropriate place and restore.
+  FuncOp f;
+  {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+
+    // Get the first function in the module as the location to insert the new
+    // function.
+    Operation *firstFn = &theModule->getRegion(0).getBlocks().front().front();
+    builder.setInsertionPoint(firstFn);
+
+    f = builder.create<cir::FuncOp>(loc, name, type);
+
+    assert(f.isDeclaration() && "expected empty body");
+
+    // A declaration gets private visibility by default, but external linkage
+    // as the default linkage.
+    f.setLinkageAttr(cir::GlobalLinkageKindAttr::get(
+        &mlirContext, cir::GlobalLinkageKind::ExternalLinkage));
+    mlir::SymbolTable::setSymbolVisibility(
+        f, mlir::SymbolTable::Visibility::Private);
+
+    // Initialize with empty dict of extra attributes.
+    f.setExtraAttrsAttr(cir::ExtraFuncAttributesAttr::get(
+        &mlirContext, builder.getDictionaryAttr({})));
+  }
+  return f;
+}
+
+/// If the specified mangled name is not in the module,
+/// create and return a CIR Function with the specified type. If there is
+/// something in the module with the specified name, return it potentially
+/// bitcasted to the right type.
+///
+/// If D is non-null, it specifies a decl that corresponded to this. This is
+/// used to set the attributes on the function when it is first created.
+static cir::FuncOp getOrCreateCIRFunctionForRuntimeFunction(
+    clang::ASTContext &astContext, mlir::MLIRContext &mlirContext,
+    mlir::ModuleOp theModule, clang::CIRGen::CIRGenBuilderTy &builder,
+    StringRef mangledName, mlir::Type type) {
+  // Lookup the entry, lazily creating it if necessary.
+  mlir::Operation *entry = getGlobalValue(theModule, mangledName);
+  if (entry) {
+    assert(isa<cir::FuncOp>(entry) &&
+           "not implemented, only supports FuncOp for now");
+
+    // If there are two attempts to define the same mangled name, issue an
+    // error.
+    auto fn = cast<cir::FuncOp>(entry);
+
+    if (fn && fn.getFunctionType() == type) {
+      return fn;
+    }
+  }
+
+  // This function doesn't have a complete type (for example, the return type is
+  // an incomplete struct). Use a fake type instead, and make sure not to try to
+  // set attributes.
+  bool isIncompleteFunction = false;
+
+  cir::FuncType fTy;
+  if (mlir::isa<cir::FuncType>(type)) {
+    fTy = mlir::cast<cir::FuncType>(type);
+  } else {
+    assert(false && "NYI");
+    // FTy = mlir::FunctionType::get(VoidTy, false);
+    isIncompleteFunction = true;
+  }
+
+  // TODO: CodeGen includeds the linkage (ExternalLinkage) and only passes the
+  // mangledname if Entry is nullptr
+  auto func = createCIRFunction(builder, mlirContext, theModule,
+                                theModule.getLoc(), mangledName, fTy);
+
+  // If we already created a function with the same mangled name (but different
+  // type) before, take its name and add it to the list of functions to be
+  // replaced with F at the end of CodeGen.
+  //
+  // This happens if there is a prototype for a function (e.g. "int f()") and
+  // then a definition of a different type (e.g. "int f(int x)").
+  if (entry) {
+    // Fetch a generic symbol-defining operation and its uses.
+    auto symbolOp = dyn_cast<mlir::SymbolOpInterface>(entry);
+    assert(symbolOp && "Expected a symbol-defining operation");
+
+    // TODO(cir): When can this symbol be something other than a function?
+    assert(isa<cir::FuncOp>(entry) && "NYI");
+
+    // Obliterate no-proto declaration.
+    entry->erase();
   }
+
+  if (!isIncompleteFunction) {
+    assert(func.getFunctionType() == type);
+    return func;
+  }
+
+  // TODO(cir): Might need bitcast to different address space.
+  assert(!cir::MissingFeatures::addressSpace());
+  return func;
+}
+
+static cir::FuncOp createRuntimeFunction(
+    clang::ASTContext &astContext, mlir::MLIRContext &mlirContext,
+    mlir::ModuleOp theModule, clang::CIRGen::CIRGenBuilderTy &builder,
+
+    cir::FuncType type, StringRef name, mlir::ArrayAttr = {},
+    [[maybe_unused]] bool local = false, bool assumeConvergent = false) {
+  if (assumeConvergent) {
+    llvm_unreachable("NYI");
+  }
+  if (local)
+    llvm_unreachable("NYI");
+
+  auto entry = getOrCreateCIRFunctionForRuntimeFunction(
+      astContext, mlirContext, theModule, builder, name, type);
+
+  // Traditional codegen checks for a valid dyn_cast llvm::Function for `entry`,
+  // no testcase that cover this path just yet though.
+  if (!entry) {
+    // Setup runtime CC, DLL support for windows and set dso local.
+    llvm_unreachable("NYI");
+  }
+
+  return entry;
+}
+
+static cir::FuncOp getGuardAbortFn(clang::ASTContext &astContext,
+                                   mlir::MLIRContext &mlirContext,
+                                   mlir::ModuleOp theModule,
+                                   clang::CIRGen::CIRGenBuilderTy &builder,
+                                   cir::PointerType guardPtrTy) {
+  // void __cxa_guard_abort(__guard *guard_object);
+  cir::FuncType fTy = builder.getFuncType(guardPtrTy, {builder.getVoidTy()},
+                                          /*isVarArg=*/false);
+  assert(!cir::MissingFeatures::functionIndexAttribute());
+  assert(!cir::MissingFeatures::noUnwindAttribute());
+  return createRuntimeFunction(astContext, mlirContext, theModule, builder, fTy,
+                               "__cxa_guard_abort");
+}
+
+static cir::FuncOp getGuardAcquireFn(clang::ASTContext &astContext,
+                                     mlir::MLIRContext &mlirContext,
+                                     mlir::ModuleOp theModule,
+                                     clang::CIRGen::CIRGenBuilderTy &builder,
+                                     cir::PointerType guardPtrTy) {
+  // int __cxa_guard_acquire(__guard *guard_object);
+  // TODO(CIR): The hardcoded getSInt32Ty is wrong here. CodeGen uses
+  // CodeGenTypes.convertType but we don't have access to the CGM.
+  cir::FuncType fTy = builder.getFuncType(guardPtrTy, {builder.getSInt32Ty()},
+                                          /*isVarArg=*/false);
+  assert(!cir::MissingFeatures::functionIndexAttribute());
+  assert(!cir::MissingFeatures::noUnwindAttribute());
+  return createRuntimeFunction(astContext, mlirContext, theModule, builder, fTy,
+                               "__cxa_guard_acquire");
+}
+
+static cir::FuncOp getGuardReleaseFn(clang::ASTContext &astContext,
+                                     mlir::MLIRContext &mlirContext,
+                                     mlir::ModuleOp theModule,
+                                     clang::CIRGen::CIRGenBuilderTy &builder,
+                                     cir::PointerType guardPtrTy) {
+  // void __cxa_guard_release(__guard *guard_object);
+  cir::FuncType fTy = builder.getFuncType(guardPtrTy, {builder.getVoidTy()},
+                                          /*isVarArg=*/false);
+  assert(!cir::MissingFeatures::functionIndexAttribute());
+  assert(!cir::MissingFeatures::noUnwindAttribute());
+  return createRuntimeFunction(astContext, mlirContext, theModule, builder, fTy,
+                               "__cxa_guard_release");
+}
+
+static mlir::Value emitRuntimeCall(clang::CIRGen::CIRGenBuilderTy &builder,
+                                   mlir::Location loc, cir::FuncOp callee,
+                                   ArrayRef<mlir::Value> args) {
+  // TODO(cir): set the calling convention to this runtime call.
+  assert(!cir::MissingFeatures::setCallingConv());
+
+  auto call = builder.createCallOp(loc, callee, args);
+  assert(call->getNumResults() <= 1 &&
+         "runtime functions have at most 1 result");
+
+  if (call->getNumResults() == 0)
+    return nullptr;
+
+  return call->getResult(0);
+}
+
+static mlir::Value
+emitNounwindRuntimeCall(clang::CIRGen::CIRGenBuilderTy &builder,
+                        mlir::Location loc, cir::FuncOp callee,
+                        ArrayRef<mlir::Value> args) {
+  mlir::Value call = emitRuntimeCall(builder, loc, callee, args);
+  assert(!cir::MissingFeatures::noUnwindAttribute());
+  return call;
+}
+
+void LoweringPreparePass::handleStaticLocal(GlobalOp globalOp,
+                                            GetGlobalOp getGlobalOp) {
+
+  std::optional<cir::ASTVarDeclInterface> astOption = globalOp.getAst();
+  assert(astOption.has_value());
+  cir::ASTVarDeclInterface varDecl = astOption.value();
+
+  builder->setInsertionPointAfter(getGlobalOp);
+  Block *getGlobalOpBlock = builder->getInsertionBlock();
+  // TODO(CIR): This is too simple at the moment. This is only tested on a
+  // simple test case with only the static local var decl and thus we only have
+  // the return. For less trivial examples we'll have to handle shuffling the
+  // contents of this block more carefully.
+  Operation *ret = getGlobalOpBlock->getTerminator();
+  ret->remove();
+  builder->setInsertionPointAfter(getGlobalOp);
+
+  // Inline variables that weren't instantiated from variable templates have
+  // partially-ordered initialization within their translation unit.
+  bool nonTemplateInline =
+      varDecl.isInline() &&
+      !isTemplateInstantiation(varDecl.getTemplateSpecializationKind());
+
+  // We only need to use thread-safe statics for local non-TLS variables and
+  // inline variables; other global initialization is always single-threaded
+  // or (through lazy dynamic loading in multiple threads) unsequenced.
+  bool threadsafe = astCtx->getLangOpts().ThreadsafeStatics &&
+                    (varDecl.isLocalVarDecl() || nonTemplateInline) &&
+                    !varDecl.getTLSKind();
+
+  // If we have a global variable with internal linkage and thread-safe
+  // statics are disabled, we can just let the guard variable be of type i8.
+  bool useInt8GuardVariable = !threadsafe && globalOp.hasInternalLinkage();
+
+  cir::IntType guardTy;
+  clang::CharUnits guardAlignment;
+  if (useInt8GuardVariable) {
+    guardTy = cir::IntType::get(&getContext(), 8, /*isSigned=*/true);
+    guardAlignment = clang::CharUnits::One();
+  } else {
+    // Guard variables are 64 bits in the generic ABI and size width on ARM
+    // (i.e. 32-bit on AArch32, 64-bit on AArch64).
+    if (::cir::MissingFeatures::useARMGuardVarABI()) {
+      llvm_unreachable("NYI");
+    } else {
+      guardTy = cir::IntType::get(&getContext(), 64, /*isSigned=*/true);
+      cir::CIRDataLayout dataLayout(theModule);
+      guardAlignment =
+          clang::CharUnits::fromQuantity(dataLayout.getABITypeAlign(guardTy));
+    }
+  }
+  auto guardPtrTy = cir::PointerType::get(guardTy);
+
+  // Create the guard variable if we don't already have it (as we might if
+  // we're double-emitting this function body).
+  cir::GlobalOp guard = getStaticLocalDeclGuardAddress(varDecl);
+  if (!guard) {
+    // Mangle the name for the guard.
+    SmallString<256> guardName;
+    {
+      llvm::raw_svector_ostream out(guardName);
+      varDecl.mangleStaticGuardVariable(out);
+    }
+
+    // Create the guard variable with a zero-initializer.
+    // Just absorb linkage, visibility and dll storage class from the guarded
+    // variable.
+    guard = createGuardGlobalOp(
+        *builder, globalOp->getLoc(), guardName, guardTy,
+        /*isConstant=*/false, /*addrSpace=*/{}, globalOp.getLinkage(),
+        getGlobalOp->getParentOfType<cir::FuncOp>());
+    guard.setInitialValueAttr(cir::IntAttr::get(guardTy, 0));
+    guard.setDSOLocal(globalOp.isDSOLocal());
+    guard.setVisibility(globalOp.getVisibility());
+    assert(!::cir::MissingFeatures::setDLLStorageClass());
+    // guard.setDLLStorageClass(globalOp.getDLLStorageClass());
+    // If the variable is thread-local, so is its guard variable.
+    assert(!::cir::MissingFeatures::threadLocal());
+    // guard.setThreadLocalMode(globalOp.getThreadLocalMode());
+    guard.setAlignment(guardAlignment.getAsAlign().value());
+
+    // The ABI says: "It is suggested that it be emitted in the same COMDAT
+    // group as the associated data object." In practice, this doesn't work
+    // for non-ELF and non-Wasm object formats, so only do it for ELF and
+    // Wasm.
+    assert(!::cir::MissingFeatures::setComdat());
+
+    setStaticLocalDeclGuardAddress(varDecl, guard);
+  }
+
+  mlir::Value getGuard = builder->createGetGlobal(guard, /*threadLocal*/ false);
+  clang::CIRGen::Address guardAddr =
+      clang::CIRGen::Address(getGuard, guard.getSymType(), guardAlignment);
+
+  // Test whether the variable has completed initialization.
+  //
+  // Itanium C++ ABI 3.3.2:
+  //   The following is pseudo-code showing how these functions can be used:
+  //     if (obj_guard.first_byte == 0) {
+  //       if ( __cxa_guard_acquire (&obj_guard) ) {
+  //         try {
+  //           ... initialize the object ...;
+  //         } catch (...) {
+  //           __cxa_guard_abort (&obj_guard);
+  //           throw;
+  //         }
+  //         ... queue object destructor with __cxa_atexit() ...;
+  //         __cxa_guard_release (&obj_guard);
+  //       }
+  //     }
+  //
+  // If threadsafe statics are enabled, but we don't have inline atomics, just
+  // call __cxa_guard_acquire unconditionally.  The "inline" check isn't
+  // actually inline, and the user might not expect calls to __atomic
+  // libcalls.
+  unsigned maxInlineWidthInbits =
+      astCtx->getTargetInfo().getMaxAtomicInlineWidth();
+
+  auto initBlock = [&]() {
+    // CIR: Move the initializer from the globalOp's ctor region into the
+    // current block.
+    // TODO(CIR): Once we support exceptions we'll need to walk the ctor region
+    // to change calls to invokes.
+    auto &ctorRegion = globalOp.getCtorRegion();
+    assert(!ctorRegion.empty() && "This should never be empty here.");
+    if (!ctorRegion.hasOneBlock())
+      llvm_unreachable("Multiple blocks NYI");
+    Block &block = ctorRegion.front();
+    Block *insertBlock = builder->getInsertionBlock();
+    insertBlock->getOperations().splice(insertBlock->end(),
+                                        block.getOperations(), block.begin(),
+                                        std::prev(block.end()));
+    builder->setInsertionPointToEnd(insertBlock);
+
+    ctorRegion.getBlocks().clear();
+
+    if (threadsafe) {
+      // NOTE(CIR): CodeGen clears the above pushed CallGuardAbort here and thus
+      // the __guard_abort gets inserted. We'll have to figure out how to
+      // properly handle this when supporting static locals with exceptions.
+
+      // Call __cxa_guard_release. This cannot throw.
+      emitNounwindRuntimeCall(*builder, globalOp->getLoc(),
+                              getGuardReleaseFn(*astCtx, getContext(),
+                                                theModule, *builder,
+                                                guardPtrTy),
+                              guardAddr.emitRawPointer());
+    } else if (varDecl.isLocalVarDecl()) {
+      llvm_unreachable("NYI");
+    }
+  };
+
+  // The semantics of dynamic initialization of variables with static or
+  // thread storage duration depends on whether they are declared at
+  // block-scope. The initialization of such variables at block-scope can be
+  // aborted with an exception and later retried (per C++20 [stmt.dcl]p4), and
+  // recursive entry to their initialization has undefined behavior (also per
+  // C++20 [stmt.dcl]p4). For such variables declared at non-block scope,
+  // exceptions lead to termination (per C++20 [except.terminate]p1), and
+  // recursive references to the variables are governed only by the lifetime
+  // rules (per C++20 [class.cdtor]p2), which means such references are
+  // perfectly fine as long as they avoid touching memory. As a result,
+  // block-scope variables must not be marked as initialized until after
+  // initialization completes (unless the mark is reverted following an
+  // exception), but non-block-scope variables must be marked prior to
+  // initialization so that recursive accesses during initialization do not
+  // restart initialization.
+
+  // Variables used when coping with thread-safe statics and exceptions.
+  auto guardAcquireBlock = [&]() {
+    if (threadsafe) {
+      auto loc = globalOp->getLoc();
+      // Call __cxa_guard_acquire.
+      mlir::Value value = emitNounwindRuntimeCall(
+          *builder, loc,
+          getGuardAcquireFn(*astCtx, getContext(), theModule, *builder,
+                            guardPtrTy),
+          guardAddr.emitRawPointer());
+
+      auto isNotNull = builder->createIsNotNull(loc, value);
+      builder->create<cir::IfOp>(globalOp.getLoc(), isNotNull,
+                                 /*=withElseRegion*/ false,
+                                 [&](mlir::OpBuilder &, mlir::Location) {
+                                   initBlock();
+                                   builder->createYield(getGlobalOp->getLoc());
+                                 });
+
+      // NOTE(CIR): CodeGen pushes a CallGuardAbort cleanup here, but we are
+      // synthesizing the outcome via walking the CIR in the ctor region and
+      // changing calls to invokes.
+
+    } else if (!varDecl.isLocalVarDecl()) {
+      llvm_unreachable("NYI");
+    }
+  };
+
+  if (!threadsafe || maxInlineWidthInbits) {
+    // Load the first byte of the guard variable.
+    mlir::Value load = builder->createAlignedLoad(
+        getGlobalOp.getLoc(), builder->getSInt8Ty(), guardAddr.getPointer(),
+        guardAddr.getAlignment());
+
+    // Itanium ABI:
+    //   An implementation supporting thread-safety on multiprocesor systems
+    //   must also guarantee that references to the initialized object do not
+    //   occur before the load of the initialization flag.
+    //
+    // In LLVM, we do this by marking the load Acquire.
+    if (threadsafe)
+      cast<cir::LoadOp>(load.getDefiningOp()).setAtomic(cir::MemOrder::Acquire);
+
+    // For ARM, we should only check the first bit, rather than the entire
+    // byte:
+    //
+    // ARM C++ ABI 3.2.3.1:
+    //   To support the potential use of initialization guard variables
+    //   as semaphores that are the target of ARM SWP and LDREX/STREX
+    //   synchronizing instructions we define a static initialization
+    //   guard variable to be a 4-byte aligned, 4-byte word with the
+    //   following inline access protocol.
+    //     #define INITIALIZED 1
+    //     if ((obj_guard & INITIALIZED) != INITIALIZED) {
+    //       if (__cxa_guard_acquire(&obj_guard))
+    //         ...
+    //     }
+    //
+    // and similarly for ARM64:
+    //
+    // ARM64 C++ ABI 3.2.2:
+    //   This ABI instead only specifies the value bit 0 of the static guard
+    //   variable; all other bits are platform defined. Bit 0 shall be 0 when
+    //   the variable is not initialized and 1 when it is.
+    if (MissingFeatures::useARMGuardVarABI() && !useInt8GuardVariable)
+      llvm_unreachable("NYI");
+    mlir::Value constOne = builder->getConstInt(
+        getGlobalOp->getLoc(), llvm::APSInt(llvm::APInt(8, 1),
+                                            /*isUnsigned=*/false));
+    mlir::Value value =
+        (!cir::MissingFeatures::useARMGuardVarABI() && !useInt8GuardVariable)
+            ? builder->createAnd(load, constOne)
+            : load;
+    mlir::Value needsInit =
+        builder->createIsNull(globalOp.getLoc(), value, "guard.uninitialized");
+
+    builder->create<cir::IfOp>(globalOp.getLoc(), needsInit,
+                               /*=withElseRegion*/ false,
+                               [&](mlir::OpBuilder &, mlir::Location) {
+                                 if (MissingFeatures::metaDataNode())
+                                   llvm_unreachable("NYI");
+                                 guardAcquireBlock();
+                                 builder->createYield(getGlobalOp->getLoc());
+                               });
+  }
+
+  builder->setInsertionPointToEnd(getGlobalOpBlock);
+  builder->insert(ret);
 }
 
 void LoweringPreparePass::buildGlobalCtorDtorList() {
@@ -1161,8 +1686,10 @@ void LoweringPreparePass::runOnOp(Operation *op) {
     lowerThreeWayCmpOp(threeWayCmp);
   } else if (auto vaArgOp = dyn_cast<VAArgOp>(op)) {
     lowerVAArgOp(vaArgOp);
-  } else if (auto getGlobal = dyn_cast<GlobalOp>(op)) {
-    lowerGlobalOp(getGlobal);
+  } else if (auto global = dyn_cast<GlobalOp>(op)) {
+    lowerGlobalOp(global);
+  } else if (auto getGlobal = dyn_cast<GetGlobalOp>(op)) {
+    lowerGetGlobalOp(getGlobal);
   } else if (auto dynamicCast = dyn_cast<DynamicCastOp>(op)) {
     lowerDynamicCastOp(dynamicCast);
   } else if (auto stdFind = dyn_cast<StdFindOp>(op)) {
@@ -1201,8 +1728,8 @@ void LoweringPreparePass::runOnOperation() {
 
   op->walk([&](Operation *op) {
     if (isa<UnaryOp, BinOp, CastOp, ComplexBinOp, CmpThreeWayOp, VAArgOp,
-            GlobalOp, DynamicCastOp, StdFindOp, IterEndOp, IterBeginOp,
-            ArrayCtor, ArrayDtor, cir::FuncOp, StoreOp>(op))
+            GetGlobalOp, GlobalOp, DynamicCastOp, StdFindOp, IterEndOp,
+            IterBeginOp, ArrayCtor, ArrayDtor, cir::FuncOp, StoreOp>(op))
       opsToTransform.push_back(op);
   });
 
diff --git a/clang/test/CIR/CodeGen/static-local.cpp b/clang/test/CIR/CodeGen/static-local.cpp
new file mode 100644
index 000000000000..4d2005098dec
--- /dev/null
+++ b/clang/test/CIR/CodeGen/static-local.cpp
@@ -0,0 +1,78 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -clangir-disable-passes %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIRGEN
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=LLVM
+
+int fnA();
+
+void foo() {
+  static int val = fnA();
+}
+
+
+//      CIRGEN: cir.func private @_Z3fnAv() -> !s32i attributes
+// CIRGEN-NEXT: cir.global "private" internal dsolocal @_ZZ3foovE3val = ctor : !s32i {
+// CIRGEN-NEXT:   %0 = cir.get_global @_ZZ3foovE3val : !cir.ptr<!s32i>
+// CIRGEN-NEXT:   %1 = cir.call @_Z3fnAv() : () -> !s32i
+// CIRGEN-NEXT:   cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CIRGEN-NEXT: } {alignment = 4 : i64, ast = #cir.var.decl.ast, static_local}
+// CIRGEN-NEXT: cir.func @_Z3foov() attributes
+// CIRGEN-NEXT:   %0 = cir.get_global @_ZZ3foovE3val : !cir.ptr<!s32i> {static_local}
+// CIRGEN-NEXT:   cir.return
+// CIRGEN-NEXT: }
+
+//      CIR: cir.func private @__cxa_guard_release(!cir.ptr<!s64i>)
+//      CIR: cir.func private @__cxa_guard_acquire(!cir.ptr<!s64i>) -> !s32i
+//      CIR: cir.func private @_Z3fnAv() -> !s32i
+//      CIR: cir.global "private" internal dsolocal @_ZZ3foovE3val = #cir.int<0> : !s32i {alignment = 4 : i64, ast = #cir.var.decl.ast, static_local}
+//      CIR: cir.global "private" internal dsolocal @_ZGVZ3foovE3val = #cir.int<0> : !s64i {alignment = 8 : i64}
+//      CIR: cir.func @_Z3foov() extra(#fn_attr) {
+// CIR-NEXT:   %0 = cir.get_global @_ZZ3foovE3val : !cir.ptr<!s32i> {static_local}
+// CIR-NEXT:   %1 = cir.get_global @_ZGVZ3foovE3val : !cir.ptr<!s64i>
+// CIR-NEXT:   %2 = cir.cast(bitcast, %1 : !cir.ptr<!s64i>), !cir.ptr<!s8i>
+// CIR-NEXT:   %3 = cir.load align(8) atomic(acquire) %2 : !cir.ptr<!s8i>, !s8i
+// CIR-NEXT:   %4 = cir.const #cir.int<1> : !s8i
+// CIR-NEXT:   %5 = cir.binop(and, %3, %4) : !s8i
+// CIR-NEXT:   %6 = cir.const #cir.int<0> : !s8i
+// CIR-NEXT:   %7 = cir.cmp(eq, %5, %6) : !s8i, !cir.bool
+// CIR-NEXT:   cir.if %7 {
+// CIR-NEXT:     %8 = cir.call @__cxa_guard_acquire(%1) : (!cir.ptr<!s64i>) -> !s32i
+// CIR-NEXT:     %9 = cir.const #cir.int<0> : !s32i
+// CIR-NEXT:     %10 = cir.cmp(ne, %8, %9) : !s32i, !cir.bool
+// CIR-NEXT:     cir.if %10 {
+// CIR-NEXT:       %11 = cir.get_global @_ZZ3foovE3val : !cir.ptr<!s32i>
+// CIR-NEXT:       %12 = cir.call @_Z3fnAv() : () -> !s32i
+// CIR-NEXT:       cir.store %12, %11 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:       cir.call @__cxa_guard_release(%1) : (!cir.ptr<!s64i>) -> ()
+// CIR-NEXT:     }
+// CIR-NEXT:   }
+// CIR-NEXT:   cir.return
+// CIR-NEXT: }
+
+//      LLVM: @_ZZ3foovE3val = internal global i32 0, align 4
+//      LLVM: @_ZGVZ3foovE3val = internal global i64 0, align 8
+//      LLVM: declare void @__cxa_guard_release(ptr)
+//      LLVM: declare i32 @__cxa_guard_acquire(ptr)
+//      LLVM: declare i32 @_Z3fnAv()
+
+//      LLVM: define dso_local void @_Z3foov()
+// LLVM-NEXT:   %1 = load atomic i8, ptr @_ZGVZ3foovE3val acquire, align 8
+// LLVM-NEXT:   %2 = and i8 %1, 1
+// LLVM-NEXT:   %3 = icmp eq i8 %2, 0
+// LLVM-NEXT:   br i1 %3, label %4, label %10
+//  LLVM-DAG: 4:
+// LLVM-NEXT:   %5 = call i32 @__cxa_guard_acquire(ptr @_ZGVZ3foovE3val)
+// LLVM-NEXT:   %6 = icmp ne i32 %5, 0
+// LLVM-NEXT:   br i1 %6, label %7, label %9
+//  LLVM-DAG: 7:
+// LLVM-NEXT:   %8 = call i32 @_Z3fnAv()
+// LLVM-NEXT:   store i32 %8, ptr @_ZZ3foovE3val, align 4
+// LLVM-NEXT:   call void @__cxa_guard_release(ptr @_ZGVZ3foovE3val)
+// LLVM-NEXT:   br label %9
+//  LLVM-DAG: 9:
+// LLVM-NEXT:   br label %10
+//  LLVM-DAG: 10:
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }